def after_search(self, search_results, search_params):
        ## #####################################################################
        # This method moves the dcatapit fields into the extras array (needed for
        # the CKAN harvester).
        # Basically it dynamically reverts what is done by the
        # 'convert_from_extras' to allow harvesting this plugin's custom fields.
        ## #####################################################################
        search_dicts = search_results.get('results', [])

        dcatapit_schema_fields = dcatapit_schema.get_custom_package_schema()

        for _dict in search_dicts:
            _dict_extras = _dict.get('extras', None)

            if not _dict_extras:
                _dict_extras = []
                _dict['extras'] = _dict_extras

            for field in dcatapit_schema_fields:
                field_couple = field.get('couples', [])
                if len(field_couple) > 0:
                    for couple in field_couple:
                        self.manage_extras_for_search(couple, _dict, _dict_extras)
                else:
                    self.manage_extras_for_search(field, _dict, _dict_extras)

            # remove holder info if pkg is local, use org as a source
            # see https://github.com/geosolutions-it/ckanext-dcatapit/pull/213#issuecomment-410668740
            _dict['dataset_is_local'] = helpers.dataset_is_local(_dict['id'])
            if _dict['dataset_is_local']:
                _dict.pop('holder_identifier', None)
                _dict.pop('holder_name', None)
            self._update_pkg_rights_holder(_dict)

        lang = interfaces.get_language()
        facets = search_results['search_facets']
        if 'dcat_theme' in facets:
            themes = facets['dcat_theme']
            for item in themes['items']:
                name = item['name']
                label = interfaces.get_localized_tag_name(tag_name=name, lang=lang)
                item['display_name'] = label

        return search_results
    def after_show(self, context, pkg_dict):
        schema = dcatapit_schema.get_custom_package_schema()
        # quick hack on date fields that are in wrong format
        for fdef in schema:
            if fdef.get('type') != 'date':
                continue
            fname = fdef['name']
            df_value = pkg_dict.get(fname)
            if df_value:
                tmp_value = validators.parse_date(df_value, df_value)
                if isinstance(tmp_value, datetime.date):
                    try:
                        tmp_value = tmp_value.strftime(fdef.get('format') or '%d-%m-%Y')
                    except ValueError as err:
                        log.warning('dataset %s, field %s: cannot reformat date for %s (from input %s): %s',
                                    pkg_dict['name'], fname, tmp_value, df_value, err, exc_info=err)
                        tmp_value = df_value
                pkg_dict[fname] = tmp_value

        # themes are parsed by dcat, which requires a list of URI
        # we have the format like this:
        # [{"theme": "AGRI", "subthemes": ["http://eurovoc.europa.eu/100253", "http://eurovoc.europa.eu/100258"]},
        # {"theme": "ENVI", "subthemes": []}]
        # We need to fix this.

        if not context.get('for_view'):
            if not any(x['key'] == 'theme' for x in pkg_dict.get('extras', [])):
                # there's no theme, add the list from the aggreagate
                aggr_raw = pkg_dict.get(FIELD_THEMES_AGGREGATE)
                if aggr_raw is None:
                    # let's try and find it in extras:
                    aggr_raw = next((x['value'] for x in pkg_dict.get('extras', [])
                                     if x['key'] == FIELD_THEMES_AGGREGATE), None)
                if aggr_raw is None:
                    log.error(f'No Aggregates in dataset {pkg_dict.get("id", "_")}')
                    aggr_raw = json.dumps([{'theme': 'OP_DATPRO', 'subthemes':[]}])
                    pkg_dict[FIELD_THEMES_AGGREGATE] = aggr_raw

                themes = []
                for aggr in json.loads(aggr_raw):
                    themes.append(theme_name_to_uri(aggr['theme']))

                extras = pkg_dict.get('extras', [])
                extras.append({'key': 'theme', 'value': json.dumps(themes)})
                pkg_dict['extras'] = extras

        # in some cases (automatic solr indexing after update)
        # pkg_dict may come without validation and thus
        # without extras converted to main dict.
        # this will ensure that holder keys are extracted to main dict
        pkg_update = {}
        to_remove = []
        for eidx, ex in enumerate(pkg_dict.get('extras') or []):
            if ex['key'].startswith('holder_'):
                to_remove.append(eidx)
                pkg_update[ex['key']] = ex['value']

        for k in pkg_update.keys():
            if k in pkg_dict:
                if pkg_update[k] == pkg_dict[k]:
                    log.warning(f'Ignoring duplicated key {k} with same value {pkg_update[k]}')
                else:
                    raise KeyError(f'Duplicated key in pkg_dict: {k}: {pkg_update[k]} in extras'
                                   f' vs {pkg_dict[k]} in pkg')

        for tr in reversed(to_remove):
            val = pkg_dict['extras'].pop(tr)
            assert val['key'].startswith('holder_'), val
        pkg_dict.update(pkg_update)

        # remove holder info if pkg is local, use org as a source
        # see https://github.com/geosolutions-it/ckanext-dcatapit/pull/213#issuecomment-410668740
        pkg_dict['dataset_is_local'] = helpers.dataset_is_local(pkg_dict['id'])
        if pkg_dict['dataset_is_local']:
            pkg_dict.pop('holder_identifier', None)
            pkg_dict.pop('holder_name', None)
        return self._update_pkg_rights_holder(pkg_dict)
示例#3
0
class DCATAPITPackagePlugin(plugins.SingletonPlugin,
                            toolkit.DefaultDatasetForm, DefaultTranslation):

    # IDatasetForm
    plugins.implements(plugins.IDatasetForm)

    # IConfigurer
    plugins.implements(plugins.IConfigurer)

    # IValidators
    plugins.implements(plugins.IValidators)

    # ITemplateHelpers
    plugins.implements(plugins.ITemplateHelpers)

    # IRoutes
    plugins.implements(plugins.IRoutes, inherit=True)

    # IPackageController
    plugins.implements(plugins.IPackageController, inherit=True)

    plugins.implements(plugins.IFacets, inherit=True)

    # ITranslation
    if toolkit.check_ckan_version(min_version='2.5.0'):
        plugins.implements(plugins.ITranslation, inherit=True)

    # ------------- ITranslation ---------------#

    def i18n_domain(self):
        '''Change the gettext domain handled by this plugin
        This implementation assumes the gettext domain is
        ckanext-{extension name}, hence your pot, po and mo files should be
        named ckanext-{extension name}.mo'''
        return 'ckanext-{name}'.format(name='dcatapit')

    # ------------- IRoutes ---------------#

    def before_map(self, map):
        GET = dict(method=['GET'])

        # /api/util ver 1, 2 or none
        with SubMapper(
                map,
                controller=
                'ckanext.dcatapit.controllers.api:DCATAPITApiController',
                path_prefix='/api{ver:/1|/2|}',
                ver='/1') as m:
            m.connect('/util/vocabulary/autocomplete',
                      action='vocabulary_autocomplete',
                      conditions=GET)
        return map

    # ------------- IConfigurer ---------------#

    def update_config(self, config_):
        toolkit.add_template_directory(config_, 'templates')
        toolkit.add_public_directory(config_, 'public')
        toolkit.add_resource('fanstatic', 'ckanext-dcatapit')

    # ------------- IDatasetForm ---------------#

    def update_schema_field(self, schema, field):
        validators = []
        for validator in field['validator']:
            validators.append(toolkit.get_validator(validator))

        converters = [toolkit.get_converter('convert_to_extras')]

        schema.update({field['name']: validators + converters})

    def _modify_package_schema(self, schema):

        ##
        # Getting custom package schema
        ##

        for field in dcatapit_schema.get_custom_package_schema():
            if 'ignore' in field and field['ignore'] == True:
                continue

            if 'couples' in field:
                for couple in field['couples']:
                    self.update_schema_field(schema, couple)
            else:
                self.update_schema_field(schema, field)

        schema.update({'notes': [toolkit.get_validator('not_empty')]})

        ##
        # Getting custom resource schema
        ##

        for field in dcatapit_schema.get_custom_resource_schema():
            if 'ignore' in field and field['ignore'] == True:
                continue

            validators = []
            for validator in field['validator']:
                validators.append(toolkit.get_validator(validator))

            schema['resources'].update({field['name']: validators})

        # conditionally include schema fields from MultilangResourcesPlugin
        if MLR:
            schema = MLR.update_schema(schema)

        log.debug("Schema updated for DCAT_AP-TI:  %r", schema)
        return schema

    def create_package_schema(self):
        schema = super(DCATAPITPackagePlugin, self).create_package_schema()
        schema = self._modify_package_schema(schema)
        return schema

    def update_package_schema(self):
        schema = super(DCATAPITPackagePlugin, self).update_package_schema()
        schema = self._modify_package_schema(schema)
        return schema

    def update_show_schema_field(self, schema, field):
        validators = []
        for validator in field['validator']:
            validators.append(toolkit.get_validator(validator))

        converters = [toolkit.get_converter('convert_from_extras')]

        schema.update({field['name']: converters + validators})

    def show_package_schema(self):
        schema = super(DCATAPITPackagePlugin, self).show_package_schema()

        ##
        # Getting custom package schema
        ##

        for field in dcatapit_schema.get_custom_package_schema():
            if 'ignore' in field and field['ignore'] == True:
                continue

            if 'couples' in field:
                for couple in field['couples']:
                    self.update_show_schema_field(schema, couple)
            else:
                self.update_show_schema_field(schema, field)

        schema.update({'notes': [toolkit.get_validator('not_empty')]})

        ##
        # Getting custom resource schema
        ##

        for field in dcatapit_schema.get_custom_resource_schema():
            if 'ignore' in field and field['ignore'] == True:
                continue

            validators = []
            for validator in field['validator']:
                validators.append(toolkit.get_validator(validator))

            schema['resources'].update({field['name']: validators})

        # conditionally include schema fields from MultilangResourcesPlugin
        if MLR:
            schema = MLR.update_schema(schema)
        log.debug("Schema updated for DCAT_AP-TI:  %r", schema)

        return schema

    def is_fallback(self):
        # Return True to register this plugin as the default handler for
        # package types not handled by any other IDatasetForm plugin.
        return True

    def package_types(self):
        # This plugin doesn't handle any special package types, it just
        # registers itself as the default (above).
        return []

    if MLR:

        def read_template(self):
            return MLR.read_template()

        def edit_template(self):
            return MLR.edit_template()

        def resource_form(self):
            return MLR.resource_form()

    # ------------- IValidators ---------------#

    def get_validators(self):
        return {
            'couple_validator': validators.couple_validator,
            'no_number': validators.no_number,
            'dcatapit_id_unique': validators.dcatapit_id_unique,
            'dcatapit_conforms_to': validators.dcatapit_conforms_to,
            'dcatapit_alternate_identifier':
            validators.dcatapit_alternate_identifier,
            'dcatapit_creator': validators.dcatapit_creator,
            'dcatapit_temporal_coverage':
            validators.dcatapit_temporal_coverage,
            'dcatapit_subthemes': validators.dcatapit_subthemes,
        }

    # ------------- ITemplateHelpers ---------------#

    def get_helpers(self):
        dcatapit_helpers = {
            'get_dcatapit_package_schema': helpers.get_dcatapit_package_schema,
            'get_vocabulary_items': helpers.get_vocabulary_items,
            'get_vocabulary_item': helpers.get_vocabulary_item,
            'get_dcatapit_resource_schema':
            helpers.get_dcatapit_resource_schema,
            'list_to_string': helpers.list_to_string,
            'couple_to_html': helpers.couple_to_html,
            'couple_to_string': helpers.couple_to_string,
            'couple_to_dict': helpers.couple_to_dict,
            'format': helpers.format,
            'validate_dateformat': helpers.validate_dateformat,
            'get_localized_field_value': helpers.get_localized_field_value,
            'get_package_resource_dcatapit_format_list':
            helpers.get_package_resource_dcatapit_format_list,
            'get_resource_licenses_tree': helpers.get_resource_licenses_tree,
            'get_dcatapit_license': helpers.get_dcatapit_license,
            'load_json_or_list': helpers.load_json_or_list,
            'get_geonames_config': helpers.get_geonames_config,
            'load_dcatapit_subthemes': helpers.load_dcatapit_subthemes,
            'get_dcatapit_subthemes': helpers.get_dcatapit_subthemes,
            'dump_dcatapit_subthemes': helpers.dump_dcatapit_subthemes,
            'get_localized_subtheme': helpers.get_localized_subtheme,
            'dcatapit_enable_form_tabs': helpers.get_enable_form_tabs,
            'dcatapit_get_icustomschema_fields':
            helpers.get_icustomschema_fields,
        }

        if MLR:
            dcatapit_helpers.update(MLR.get_helpers())
        return dcatapit_helpers

    # ------------- IPackageController ---------------#

    def after_create(self, context, pkg_dict):
        # During the harvest the get_lang() is not defined
        lang = interfaces.get_language()
        otype = pkg_dict.get('type')
        if lang and otype == 'dataset':
            for extra in pkg_dict.get('extras') or []:
                for field in dcatapit_schema.get_custom_package_schema():

                    couples = field.get('couples', [])
                    if couples and len(couples) > 0:
                        for couple in couples:
                            if extra.get('key') == couple.get(
                                    'name', None) and couple.get(
                                        'localized', False) == True:
                                log.debug(
                                    ':::::::::::::::Localizing custom schema field: %r',
                                    couple['name'])
                                # Create the localized field recorcd
                                self.create_loc_field(extra, lang,
                                                      pkg_dict.get('id'))
                    else:
                        if extra.get('key') == field.get(
                                'name', None) and field.get(
                                    'localized', False) == True:
                            log.debug(
                                ':::::::::::::::Localizing custom schema field: %r',
                                field['name'])
                            # Create the localized field record
                            self.create_loc_field(extra, lang,
                                                  pkg_dict.get('id'))

    def after_update(self, context, pkg_dict):
        # During the harvest the get_lang() is not defined
        lang = interfaces.get_language()
        otype = pkg_dict.get('type')

        if lang and otype == 'dataset':
            for extra in pkg_dict.get('extras') or []:
                for field in dcatapit_schema.get_custom_package_schema():
                    couples = field.get('couples', [])
                    if couples and len(couples) > 0:
                        for couple in couples:
                            self.update_loc_field(extra, pkg_dict.get('id'),
                                                  couple, lang)
                    else:
                        self.update_loc_field(extra, pkg_dict.get('id'), field,
                                              lang)

    def before_index(self, dataset_dict):
        '''
        Insert `dcat_theme` into solr
        '''

        extra_theme = dataset_dict.get("extras_theme", None) or ''
        themes = helpers.dump_dcatapit_subthemes(extra_theme)
        search_terms = [t['theme'] for t in themes]
        if search_terms:
            dataset_dict['dcat_theme'] = search_terms

        search_subthemes = []
        for t in themes:
            search_subthemes.extend(t.get('subthemes') or [])

        if search_terms:
            dataset_dict['dcat_theme'] = search_terms
        if search_subthemes:
            dataset_dict['dcat_subtheme'] = search_subthemes
            localized_subthemes = interfaces.get_localized_subthemes(
                search_subthemes)
            for lang, subthemes in localized_subthemes.items():
                dataset_dict['dcat_subtheme_{}'.format(lang)] = subthemes
        ddict = json.loads(dataset_dict['data_dict'])
        resources = ddict.get('resources') or []
        _licenses = list(
            set([
                r.get('license_type') for r in resources
                if r.get('license_type')
            ]))

        licenses = []
        for l in _licenses:
            lic = License.get(l)
            if lic:
                for loclic in lic.get_names():
                    lname = loclic['name']
                    lang = loclic['lang']
                    if lname:
                        dataset_dict['resource_license_{}'.format(
                            lang)] = lname
            else:
                log.warn('Bad license: license not found: %r ', l)
        dataset_dict['resource_license'] = _licenses

        org_id = dataset_dict['owner_org']
        organization_show = plugins.toolkit.get_action('organization_show')
        if org_id:
            org = organization_show(
                get_org_context(), {
                    'id': org_id,
                    'include_tags': False,
                    'include_users': False,
                    'include_groups': False,
                    'include_extras': True,
                    'include_followers': False,
                    'include_datasets': False,
                })
        else:
            org = {}
        if org.get('region'):

            # multilang values
            # note region can be in {val1,val2} notation for multiple values
            region_base = org['region']
            if not isinstance(region_base, (
                    list,
                    tuple,
            )):
                region_base = region_base.strip('{}').split(',')
            tags = {}

            for region_name in region_base:
                ltags = interfaces.get_all_localized_tag_labels(region_name)
                for tlang, tvalue in ltags.items():
                    try:
                        tags[tlang].append(tvalue)
                    except KeyError:
                        tags[tlang] = [tvalue]

            for lang, region in tags.items():
                dataset_dict['organization_region_{}'.format(lang)] = region

        self._update_pkg_rights_holder(dataset_dict, org=org)
        return dataset_dict

    def before_search(self, search_params):
        '''
        # this code may be needed with different versions of solr
        
        fq_all = [] 
       
        if isinstance(search_params['fq'], (str,unicode,)):
            fq = [search_params['fq']]
        else:
            fq = search_params['fq']
        if fq and fq[0] and not fq[0].startswith(('+', '-')):
            fq[0] = u'+{}'.format(fq[0])
        search_params['fq'] = ' '.join(fq)
        '''

        return search_params

    def after_search(self, search_results, search_params):
        ## #####################################################################
        # This method moves the dcatapit fields into the extras array (needed for
        # the CKAN harvester).
        # Basically it dynamically reverts what is done by the
        # 'convert_from_extras' to allow harvesting this plugin's custom fields.
        ## #####################################################################
        search_dicts = search_results.get('results', [])

        dcatapit_schema_fields = dcatapit_schema.get_custom_package_schema()

        for _dict in search_dicts:
            _dict_extras = _dict.get('extras', None)

            if not _dict_extras:
                _dict_extras = []
                _dict['extras'] = _dict_extras

            for field in dcatapit_schema_fields:
                field_couple = field.get('couples', [])
                if len(field_couple) > 0:
                    for couple in field_couple:
                        self.manage_extras_for_search(couple, _dict,
                                                      _dict_extras)
                else:
                    self.manage_extras_for_search(field, _dict, _dict_extras)

            # remove holder info if pkg is local, use org as a source
            # see https://github.com/geosolutions-it/ckanext-dcatapit/pull/213#issuecomment-410668740
            _dict['dataset_is_local'] = helpers.dataset_is_local(_dict['id'])
            #if _dict['dataset_is_local']:
            #_dict.pop('holder_identifier', None)
            #_dict.pop('holder_name', None)
            self._update_pkg_rights_holder(_dict)

        return search_results

    def manage_extras_for_search(self, field, _dict, _dict_extras):
        field_name = field.get('name', None)

        if field_name and field_name in _dict:
            field_value = _dict.get(field_name, None)
            _dict_extras.append({'key': field_name, 'value': field_value})
            del _dict[field_name]

    def update_loc_field(self, extra, pkg_id, field, lang):
        interfaces.update_extra_package_multilang(extra, pkg_id, field, lang)

    def create_loc_field(self, extra, lang, pkg_id):
        interfaces.save_extra_package_multilang(
            {
                'id': pkg_id,
                'text': extra.get('value'),
                'field': extra.get('key')
            }, lang, 'extra')

    def before_view(self, pkg_dict):
        return self._update_pkg_rights_holder(pkg_dict)

    def after_show(self, context, pkg_dict):
        schema = dcatapit_schema.get_custom_package_schema()
        # quick hack on date fields that are in wrong format
        for fdef in schema:
            if fdef.get('type') != 'date':
                continue
            fname = fdef['name']
            df_value = pkg_dict.get(fname)
            if df_value:
                tmp_value = validators.parse_date(df_value, df_value)
                if isinstance(tmp_value, datetime.date):
                    try:
                        tmp_value = tmp_value.strftime(
                            fdef.get('format') or '%d-%m-%Y')
                    except ValueError, err:
                        log.warning(
                            "dataset %s, field %s: cannot reformat date for %s (from input %s): %s",
                            pkg_dict['name'],
                            fname,
                            tmp_value,
                            df_value,
                            err,
                            exc_info=err)
                        tmp_value = df_value
                pkg_dict[fname] = tmp_value

        # in some cases (automatic solr indexing after update)
        # pkg_dict may come without validation and thus
        # without extras converted to main dict.
        # this will ensure that holder keys are extracted to main dict
        pkg_update = {}
        to_remove = []
        for eidx, ex in enumerate(pkg_dict.get('extras') or []):
            if ex['key'].startswith('holder_'):
                to_remove.append(eidx)
                pkg_update[ex['key']] = ex['value']

        for k in pkg_update.keys():
            if k in pkg_dict:
                raise KeyError(
                    "Duplicated key in pkg_dict: {}: {} in extras vs {} in pkg"
                    .format(k, pkg_update[k], pkg_dict[k]))
        for tr in reversed(to_remove):
            val = pkg_dict['extras'].pop(tr)
            assert val['key'].startswith('holder_'), val
        pkg_dict.update(pkg_update)

        # remove holder info if pkg is local, use org as a source
        # see https://github.com/geosolutions-it/ckanext-dcatapit/pull/213#issuecomment-410668740
        pkg_dict['dataset_is_local'] = helpers.dataset_is_local(pkg_dict['id'])
        #if pkg_dict['dataset_is_local']:
        #pkg_dict.pop('holder_identifier', None)
        #pkg_dict.pop('holder_name', None)
        return self._update_pkg_rights_holder(pkg_dict)