def after_search(self, search_results, search_params): ## ##################################################################### # This method moves the dcatapit fields into the extras array (needed for # the CKAN harvester). # Basically it dynamically reverts what is done by the # 'convert_from_extras' to allow harvesting this plugin's custom fields. ## ##################################################################### search_dicts = search_results.get('results', []) dcatapit_schema_fields = dcatapit_schema.get_custom_package_schema() for _dict in search_dicts: _dict_extras = _dict.get('extras', None) if not _dict_extras: _dict_extras = [] _dict['extras'] = _dict_extras for field in dcatapit_schema_fields: field_couple = field.get('couples', []) if len(field_couple) > 0: for couple in field_couple: self.manage_extras_for_search(couple, _dict, _dict_extras) else: self.manage_extras_for_search(field, _dict, _dict_extras) # remove holder info if pkg is local, use org as a source # see https://github.com/geosolutions-it/ckanext-dcatapit/pull/213#issuecomment-410668740 _dict['dataset_is_local'] = helpers.dataset_is_local(_dict['id']) if _dict['dataset_is_local']: _dict.pop('holder_identifier', None) _dict.pop('holder_name', None) self._update_pkg_rights_holder(_dict) lang = interfaces.get_language() facets = search_results['search_facets'] if 'dcat_theme' in facets: themes = facets['dcat_theme'] for item in themes['items']: name = item['name'] label = interfaces.get_localized_tag_name(tag_name=name, lang=lang) item['display_name'] = label return search_results
def after_show(self, context, pkg_dict): schema = dcatapit_schema.get_custom_package_schema() # quick hack on date fields that are in wrong format for fdef in schema: if fdef.get('type') != 'date': continue fname = fdef['name'] df_value = pkg_dict.get(fname) if df_value: tmp_value = validators.parse_date(df_value, df_value) if isinstance(tmp_value, datetime.date): try: tmp_value = tmp_value.strftime(fdef.get('format') or '%d-%m-%Y') except ValueError as err: log.warning('dataset %s, field %s: cannot reformat date for %s (from input %s): %s', pkg_dict['name'], fname, tmp_value, df_value, err, exc_info=err) tmp_value = df_value pkg_dict[fname] = tmp_value # themes are parsed by dcat, which requires a list of URI # we have the format like this: # [{"theme": "AGRI", "subthemes": ["http://eurovoc.europa.eu/100253", "http://eurovoc.europa.eu/100258"]}, # {"theme": "ENVI", "subthemes": []}] # We need to fix this. if not context.get('for_view'): if not any(x['key'] == 'theme' for x in pkg_dict.get('extras', [])): # there's no theme, add the list from the aggreagate aggr_raw = pkg_dict.get(FIELD_THEMES_AGGREGATE) if aggr_raw is None: # let's try and find it in extras: aggr_raw = next((x['value'] for x in pkg_dict.get('extras', []) if x['key'] == FIELD_THEMES_AGGREGATE), None) if aggr_raw is None: log.error(f'No Aggregates in dataset {pkg_dict.get("id", "_")}') aggr_raw = json.dumps([{'theme': 'OP_DATPRO', 'subthemes':[]}]) pkg_dict[FIELD_THEMES_AGGREGATE] = aggr_raw themes = [] for aggr in json.loads(aggr_raw): themes.append(theme_name_to_uri(aggr['theme'])) extras = pkg_dict.get('extras', []) extras.append({'key': 'theme', 'value': json.dumps(themes)}) pkg_dict['extras'] = extras # in some cases (automatic solr indexing after update) # pkg_dict may come without validation and thus # without extras converted to main dict. # this will ensure that holder keys are extracted to main dict pkg_update = {} to_remove = [] for eidx, ex in enumerate(pkg_dict.get('extras') or []): if ex['key'].startswith('holder_'): to_remove.append(eidx) pkg_update[ex['key']] = ex['value'] for k in pkg_update.keys(): if k in pkg_dict: if pkg_update[k] == pkg_dict[k]: log.warning(f'Ignoring duplicated key {k} with same value {pkg_update[k]}') else: raise KeyError(f'Duplicated key in pkg_dict: {k}: {pkg_update[k]} in extras' f' vs {pkg_dict[k]} in pkg') for tr in reversed(to_remove): val = pkg_dict['extras'].pop(tr) assert val['key'].startswith('holder_'), val pkg_dict.update(pkg_update) # remove holder info if pkg is local, use org as a source # see https://github.com/geosolutions-it/ckanext-dcatapit/pull/213#issuecomment-410668740 pkg_dict['dataset_is_local'] = helpers.dataset_is_local(pkg_dict['id']) if pkg_dict['dataset_is_local']: pkg_dict.pop('holder_identifier', None) pkg_dict.pop('holder_name', None) return self._update_pkg_rights_holder(pkg_dict)
class DCATAPITPackagePlugin(plugins.SingletonPlugin, toolkit.DefaultDatasetForm, DefaultTranslation): # IDatasetForm plugins.implements(plugins.IDatasetForm) # IConfigurer plugins.implements(plugins.IConfigurer) # IValidators plugins.implements(plugins.IValidators) # ITemplateHelpers plugins.implements(plugins.ITemplateHelpers) # IRoutes plugins.implements(plugins.IRoutes, inherit=True) # IPackageController plugins.implements(plugins.IPackageController, inherit=True) plugins.implements(plugins.IFacets, inherit=True) # ITranslation if toolkit.check_ckan_version(min_version='2.5.0'): plugins.implements(plugins.ITranslation, inherit=True) # ------------- ITranslation ---------------# def i18n_domain(self): '''Change the gettext domain handled by this plugin This implementation assumes the gettext domain is ckanext-{extension name}, hence your pot, po and mo files should be named ckanext-{extension name}.mo''' return 'ckanext-{name}'.format(name='dcatapit') # ------------- IRoutes ---------------# def before_map(self, map): GET = dict(method=['GET']) # /api/util ver 1, 2 or none with SubMapper( map, controller= 'ckanext.dcatapit.controllers.api:DCATAPITApiController', path_prefix='/api{ver:/1|/2|}', ver='/1') as m: m.connect('/util/vocabulary/autocomplete', action='vocabulary_autocomplete', conditions=GET) return map # ------------- IConfigurer ---------------# def update_config(self, config_): toolkit.add_template_directory(config_, 'templates') toolkit.add_public_directory(config_, 'public') toolkit.add_resource('fanstatic', 'ckanext-dcatapit') # ------------- IDatasetForm ---------------# def update_schema_field(self, schema, field): validators = [] for validator in field['validator']: validators.append(toolkit.get_validator(validator)) converters = [toolkit.get_converter('convert_to_extras')] schema.update({field['name']: validators + converters}) def _modify_package_schema(self, schema): ## # Getting custom package schema ## for field in dcatapit_schema.get_custom_package_schema(): if 'ignore' in field and field['ignore'] == True: continue if 'couples' in field: for couple in field['couples']: self.update_schema_field(schema, couple) else: self.update_schema_field(schema, field) schema.update({'notes': [toolkit.get_validator('not_empty')]}) ## # Getting custom resource schema ## for field in dcatapit_schema.get_custom_resource_schema(): if 'ignore' in field and field['ignore'] == True: continue validators = [] for validator in field['validator']: validators.append(toolkit.get_validator(validator)) schema['resources'].update({field['name']: validators}) # conditionally include schema fields from MultilangResourcesPlugin if MLR: schema = MLR.update_schema(schema) log.debug("Schema updated for DCAT_AP-TI: %r", schema) return schema def create_package_schema(self): schema = super(DCATAPITPackagePlugin, self).create_package_schema() schema = self._modify_package_schema(schema) return schema def update_package_schema(self): schema = super(DCATAPITPackagePlugin, self).update_package_schema() schema = self._modify_package_schema(schema) return schema def update_show_schema_field(self, schema, field): validators = [] for validator in field['validator']: validators.append(toolkit.get_validator(validator)) converters = [toolkit.get_converter('convert_from_extras')] schema.update({field['name']: converters + validators}) def show_package_schema(self): schema = super(DCATAPITPackagePlugin, self).show_package_schema() ## # Getting custom package schema ## for field in dcatapit_schema.get_custom_package_schema(): if 'ignore' in field and field['ignore'] == True: continue if 'couples' in field: for couple in field['couples']: self.update_show_schema_field(schema, couple) else: self.update_show_schema_field(schema, field) schema.update({'notes': [toolkit.get_validator('not_empty')]}) ## # Getting custom resource schema ## for field in dcatapit_schema.get_custom_resource_schema(): if 'ignore' in field and field['ignore'] == True: continue validators = [] for validator in field['validator']: validators.append(toolkit.get_validator(validator)) schema['resources'].update({field['name']: validators}) # conditionally include schema fields from MultilangResourcesPlugin if MLR: schema = MLR.update_schema(schema) log.debug("Schema updated for DCAT_AP-TI: %r", schema) return schema def is_fallback(self): # Return True to register this plugin as the default handler for # package types not handled by any other IDatasetForm plugin. return True def package_types(self): # This plugin doesn't handle any special package types, it just # registers itself as the default (above). return [] if MLR: def read_template(self): return MLR.read_template() def edit_template(self): return MLR.edit_template() def resource_form(self): return MLR.resource_form() # ------------- IValidators ---------------# def get_validators(self): return { 'couple_validator': validators.couple_validator, 'no_number': validators.no_number, 'dcatapit_id_unique': validators.dcatapit_id_unique, 'dcatapit_conforms_to': validators.dcatapit_conforms_to, 'dcatapit_alternate_identifier': validators.dcatapit_alternate_identifier, 'dcatapit_creator': validators.dcatapit_creator, 'dcatapit_temporal_coverage': validators.dcatapit_temporal_coverage, 'dcatapit_subthemes': validators.dcatapit_subthemes, } # ------------- ITemplateHelpers ---------------# def get_helpers(self): dcatapit_helpers = { 'get_dcatapit_package_schema': helpers.get_dcatapit_package_schema, 'get_vocabulary_items': helpers.get_vocabulary_items, 'get_vocabulary_item': helpers.get_vocabulary_item, 'get_dcatapit_resource_schema': helpers.get_dcatapit_resource_schema, 'list_to_string': helpers.list_to_string, 'couple_to_html': helpers.couple_to_html, 'couple_to_string': helpers.couple_to_string, 'couple_to_dict': helpers.couple_to_dict, 'format': helpers.format, 'validate_dateformat': helpers.validate_dateformat, 'get_localized_field_value': helpers.get_localized_field_value, 'get_package_resource_dcatapit_format_list': helpers.get_package_resource_dcatapit_format_list, 'get_resource_licenses_tree': helpers.get_resource_licenses_tree, 'get_dcatapit_license': helpers.get_dcatapit_license, 'load_json_or_list': helpers.load_json_or_list, 'get_geonames_config': helpers.get_geonames_config, 'load_dcatapit_subthemes': helpers.load_dcatapit_subthemes, 'get_dcatapit_subthemes': helpers.get_dcatapit_subthemes, 'dump_dcatapit_subthemes': helpers.dump_dcatapit_subthemes, 'get_localized_subtheme': helpers.get_localized_subtheme, 'dcatapit_enable_form_tabs': helpers.get_enable_form_tabs, 'dcatapit_get_icustomschema_fields': helpers.get_icustomschema_fields, } if MLR: dcatapit_helpers.update(MLR.get_helpers()) return dcatapit_helpers # ------------- IPackageController ---------------# def after_create(self, context, pkg_dict): # During the harvest the get_lang() is not defined lang = interfaces.get_language() otype = pkg_dict.get('type') if lang and otype == 'dataset': for extra in pkg_dict.get('extras') or []: for field in dcatapit_schema.get_custom_package_schema(): couples = field.get('couples', []) if couples and len(couples) > 0: for couple in couples: if extra.get('key') == couple.get( 'name', None) and couple.get( 'localized', False) == True: log.debug( ':::::::::::::::Localizing custom schema field: %r', couple['name']) # Create the localized field recorcd self.create_loc_field(extra, lang, pkg_dict.get('id')) else: if extra.get('key') == field.get( 'name', None) and field.get( 'localized', False) == True: log.debug( ':::::::::::::::Localizing custom schema field: %r', field['name']) # Create the localized field record self.create_loc_field(extra, lang, pkg_dict.get('id')) def after_update(self, context, pkg_dict): # During the harvest the get_lang() is not defined lang = interfaces.get_language() otype = pkg_dict.get('type') if lang and otype == 'dataset': for extra in pkg_dict.get('extras') or []: for field in dcatapit_schema.get_custom_package_schema(): couples = field.get('couples', []) if couples and len(couples) > 0: for couple in couples: self.update_loc_field(extra, pkg_dict.get('id'), couple, lang) else: self.update_loc_field(extra, pkg_dict.get('id'), field, lang) def before_index(self, dataset_dict): ''' Insert `dcat_theme` into solr ''' extra_theme = dataset_dict.get("extras_theme", None) or '' themes = helpers.dump_dcatapit_subthemes(extra_theme) search_terms = [t['theme'] for t in themes] if search_terms: dataset_dict['dcat_theme'] = search_terms search_subthemes = [] for t in themes: search_subthemes.extend(t.get('subthemes') or []) if search_terms: dataset_dict['dcat_theme'] = search_terms if search_subthemes: dataset_dict['dcat_subtheme'] = search_subthemes localized_subthemes = interfaces.get_localized_subthemes( search_subthemes) for lang, subthemes in localized_subthemes.items(): dataset_dict['dcat_subtheme_{}'.format(lang)] = subthemes ddict = json.loads(dataset_dict['data_dict']) resources = ddict.get('resources') or [] _licenses = list( set([ r.get('license_type') for r in resources if r.get('license_type') ])) licenses = [] for l in _licenses: lic = License.get(l) if lic: for loclic in lic.get_names(): lname = loclic['name'] lang = loclic['lang'] if lname: dataset_dict['resource_license_{}'.format( lang)] = lname else: log.warn('Bad license: license not found: %r ', l) dataset_dict['resource_license'] = _licenses org_id = dataset_dict['owner_org'] organization_show = plugins.toolkit.get_action('organization_show') if org_id: org = organization_show( get_org_context(), { 'id': org_id, 'include_tags': False, 'include_users': False, 'include_groups': False, 'include_extras': True, 'include_followers': False, 'include_datasets': False, }) else: org = {} if org.get('region'): # multilang values # note region can be in {val1,val2} notation for multiple values region_base = org['region'] if not isinstance(region_base, ( list, tuple, )): region_base = region_base.strip('{}').split(',') tags = {} for region_name in region_base: ltags = interfaces.get_all_localized_tag_labels(region_name) for tlang, tvalue in ltags.items(): try: tags[tlang].append(tvalue) except KeyError: tags[tlang] = [tvalue] for lang, region in tags.items(): dataset_dict['organization_region_{}'.format(lang)] = region self._update_pkg_rights_holder(dataset_dict, org=org) return dataset_dict def before_search(self, search_params): ''' # this code may be needed with different versions of solr fq_all = [] if isinstance(search_params['fq'], (str,unicode,)): fq = [search_params['fq']] else: fq = search_params['fq'] if fq and fq[0] and not fq[0].startswith(('+', '-')): fq[0] = u'+{}'.format(fq[0]) search_params['fq'] = ' '.join(fq) ''' return search_params def after_search(self, search_results, search_params): ## ##################################################################### # This method moves the dcatapit fields into the extras array (needed for # the CKAN harvester). # Basically it dynamically reverts what is done by the # 'convert_from_extras' to allow harvesting this plugin's custom fields. ## ##################################################################### search_dicts = search_results.get('results', []) dcatapit_schema_fields = dcatapit_schema.get_custom_package_schema() for _dict in search_dicts: _dict_extras = _dict.get('extras', None) if not _dict_extras: _dict_extras = [] _dict['extras'] = _dict_extras for field in dcatapit_schema_fields: field_couple = field.get('couples', []) if len(field_couple) > 0: for couple in field_couple: self.manage_extras_for_search(couple, _dict, _dict_extras) else: self.manage_extras_for_search(field, _dict, _dict_extras) # remove holder info if pkg is local, use org as a source # see https://github.com/geosolutions-it/ckanext-dcatapit/pull/213#issuecomment-410668740 _dict['dataset_is_local'] = helpers.dataset_is_local(_dict['id']) #if _dict['dataset_is_local']: #_dict.pop('holder_identifier', None) #_dict.pop('holder_name', None) self._update_pkg_rights_holder(_dict) return search_results def manage_extras_for_search(self, field, _dict, _dict_extras): field_name = field.get('name', None) if field_name and field_name in _dict: field_value = _dict.get(field_name, None) _dict_extras.append({'key': field_name, 'value': field_value}) del _dict[field_name] def update_loc_field(self, extra, pkg_id, field, lang): interfaces.update_extra_package_multilang(extra, pkg_id, field, lang) def create_loc_field(self, extra, lang, pkg_id): interfaces.save_extra_package_multilang( { 'id': pkg_id, 'text': extra.get('value'), 'field': extra.get('key') }, lang, 'extra') def before_view(self, pkg_dict): return self._update_pkg_rights_holder(pkg_dict) def after_show(self, context, pkg_dict): schema = dcatapit_schema.get_custom_package_schema() # quick hack on date fields that are in wrong format for fdef in schema: if fdef.get('type') != 'date': continue fname = fdef['name'] df_value = pkg_dict.get(fname) if df_value: tmp_value = validators.parse_date(df_value, df_value) if isinstance(tmp_value, datetime.date): try: tmp_value = tmp_value.strftime( fdef.get('format') or '%d-%m-%Y') except ValueError, err: log.warning( "dataset %s, field %s: cannot reformat date for %s (from input %s): %s", pkg_dict['name'], fname, tmp_value, df_value, err, exc_info=err) tmp_value = df_value pkg_dict[fname] = tmp_value # in some cases (automatic solr indexing after update) # pkg_dict may come without validation and thus # without extras converted to main dict. # this will ensure that holder keys are extracted to main dict pkg_update = {} to_remove = [] for eidx, ex in enumerate(pkg_dict.get('extras') or []): if ex['key'].startswith('holder_'): to_remove.append(eidx) pkg_update[ex['key']] = ex['value'] for k in pkg_update.keys(): if k in pkg_dict: raise KeyError( "Duplicated key in pkg_dict: {}: {} in extras vs {} in pkg" .format(k, pkg_update[k], pkg_dict[k])) for tr in reversed(to_remove): val = pkg_dict['extras'].pop(tr) assert val['key'].startswith('holder_'), val pkg_dict.update(pkg_update) # remove holder info if pkg is local, use org as a source # see https://github.com/geosolutions-it/ckanext-dcatapit/pull/213#issuecomment-410668740 pkg_dict['dataset_is_local'] = helpers.dataset_is_local(pkg_dict['id']) #if pkg_dict['dataset_is_local']: #pkg_dict.pop('holder_identifier', None) #pkg_dict.pop('holder_name', None) return self._update_pkg_rights_holder(pkg_dict)