示例#1
0
def test_get_responsible_party():
    name, code = utils.get_responsible_party(responsiblePartys, csw_harvester_config.get('agents').get('publisher'))

    eq_(name, 'Provincia Autonoma di Bolzano  - Ripartizione 28 - Natura, paesaggio e sviluppo del territorio')
    eq_(code, 'p_bz')

    name, code = utils.get_responsible_party(responsiblePartys, csw_harvester_config.get('agents').get('owner'))

    eq_(name, 'Comune di Bolzano  - Ufficio Sistema Informativo Territoriale')
    eq_(code, 'c_a952')

    name, code = utils.get_responsible_party(responsiblePartys, csw_harvester_config.get('agents').get('author'))

    eq_(name, 'Comune di Bolzano  - Ufficio Sistema Informativo Territoriale')
    eq_(code, 'c_a952')
示例#2
0
    def get_package_dict(self, iso_values, harvest_object):
        package_dict = super(DCATAPITCSWHarvester,
                             self).get_package_dict(iso_values, harvest_object)

        mapping_frequencies_to_mdr_vocabulary = self.source_config.get('mapping_frequencies_to_mdr_vocabulary', \
            utils._mapping_frequencies_to_mdr_vocabulary)
        mapping_languages_to_mdr_vocabulary = self.source_config.get('mapping_languages_to_mdr_vocabulary', \
            utils._mapping_languages_to_mdr_vocabulary)

        dcatapit_config = self.source_config.get('dcatapit_config',
                                                 self._dcatapit_config)

        #if dcatapit_config and not all(name in dcatapit_config for name in self._dcatapit_config):
        #    dcatapit_config = self._dcatapit_config
        #    log.warning('Some keys are missing in dcatapit_config configuration property, \
        #        keyes to use are: dataset_theme, dataset_language, agent_code, frequency, \
        #        agent_code_regex, org_name_regex and dcatapit_skos_theme_id. Using defaults')
        #elif not dcatapit_config:
        #    dcatapit_config = self._dcatapit_config

        controlled_vocabularies = dcatapit_config.get('controlled_vocabularies', \
            self._dcatapit_config.get('controlled_vocabularies'))
        agents = dcatapit_config.get('agents',
                                     self._dcatapit_config.get('agents'))

        #
        # Increase the tag name max length limit to 100 as set at DB level (instead 50 as did by the ckanext-spatial)
        #
        tags = []
        if 'tags' in iso_values:
            for tag in iso_values['tags']:
                tag = tag[:100] if len(tag) > 100 else tag
                tags.append({'name': tag})

        # Add default_tags from config
        default_tags = self.source_config.get('default_tags', [])
        if default_tags:
            for tag in default_tags:
                tags.append({'name': tag})

        package_dict['tags'] = tags

        # ------------------------------#
        #    MANDATORY FOR DCAT-AP_IT   #
        # ------------------------------#

        #  -- identifier -- #
        identifier = iso_values["guid"]
        package_dict['extras'].append({
            'key': 'identifier',
            'value': identifier
        })

        default_agent_code = identifier.split(
            ':')[0] if ':' in identifier else None

        #  -- theme -- #
        dataset_themes = []
        if iso_values["keywords"]:
            default_vocab_id = self._dcatapit_config.get(
                'controlled_vocabularies').get('dcatapit_skos_theme_id')
            dataset_themes = utils.get_controlled_vocabulary_values('eu_themes', \
                controlled_vocabularies.get('dcatapit_skos_theme_id', default_vocab_id), iso_values["keywords"])

        if dataset_themes and len(dataset_themes) > 1:
            dataset_themes = list(set(dataset_themes))
            dataset_themes = '{' + ','.join(str(l)
                                            for l in dataset_themes) + '}'
        else:
            dataset_themes = dataset_themes[0] if dataset_themes and len(dataset_themes) > 0 else dcatapit_config.get('dataset_themes', \
                self._dcatapit_config.get('dataset_themes'))

        log.info("Medatata harvested dataset themes: %r", dataset_themes)
        package_dict['extras'].append({
            'key': 'theme',
            'value': dataset_themes
        })

        #  -- publisher -- #
        citedResponsiblePartys = iso_values["cited-responsible-party"]
        agent_name, agent_code = utils.get_responsible_party(citedResponsiblePartys, agents.get('publisher', \
            self._dcatapit_config.get('agents').get('publisher')))
        package_dict['extras'].append({
            'key': 'publisher_name',
            'value': agent_name
        })
        package_dict['extras'].append({
            'key': 'publisher_identifier',
            'value': agent_code or default_agent_code
        })

        #  -- modified -- #
        revision_date = iso_values["date-updated"] or iso_values[
            "date-released"]
        package_dict['extras'].append({
            'key': 'modified',
            'value': revision_date
        })

        #  -- frequency -- #
        updateFrequency = iso_values["frequency-of-update"]
        package_dict['extras'].append({'key': 'frequency', 'value': \
            mapping_frequencies_to_mdr_vocabulary.get(updateFrequency, \
            dcatapit_config.get('frequency', self._dcatapit_config.get('frequency')))})

        #  -- rights_holder -- #
        citedResponsiblePartys = iso_values["cited-responsible-party"]
        agent_name, agent_code = utils.get_responsible_party(citedResponsiblePartys, \
            agents.get('owner', self._dcatapit_config.get('agents').get('owner')))
        package_dict['extras'].append({
            'key': 'holder_name',
            'value': agent_name
        })
        package_dict['extras'].append({
            'key': 'holder_identifier',
            'value': agent_code or default_agent_code
        })

        # -----------------------------------------------#
        #    OTHER FIELDS NOT MANDATORY FOR DCAT_AP-IT   #
        # -----------------------------------------------#

        #  -- alternate_identifier nothing to do  -- #

        #  -- issued -- #
        publication_date = iso_values["date-released"]
        package_dict['extras'].append({
            'key': 'issued',
            'value': publication_date
        })

        #  -- geographical_name  -- #
        dataset_places = []
        if iso_values["keywords"]:
            default_vocab_id = self._dcatapit_config.get(
                'controlled_vocabularies').get('dcatapit_skos_theme_id')
            dataset_places = utils.get_controlled_vocabulary_values('places', \
                controlled_vocabularies.get('dcatapit_skos_places_id', default_vocab_id), iso_values["keywords"])

        if dataset_places and len(dataset_places) > 1:
            dataset_places = list(set(dataset_places))
            dataset_places = '{' + ','.join(str(l)
                                            for l in dataset_places) + '}'
        else:
            dataset_places = dataset_places[0] if dataset_places and len(dataset_places) > 0 else dcatapit_config.get('dataset_places', \
                self._dcatapit_config.get('dataset_places'))

        if dataset_places:
            log.info("Medatata harvested dataset places: %r", dataset_places)
            package_dict['extras'].append({
                'key': 'geographical_name',
                'value': dataset_places
            })

        #  -- geographical_geonames_url nothing to do  -- #

        #  -- language -- #
        dataset_languages = iso_values["dataset-language"]
        language = None
        if dataset_languages and len(dataset_languages) > 0:
            languages = []
            for language in dataset_languages:
                lang = mapping_languages_to_mdr_vocabulary.get(language, None)
                if lang:
                    languages.append(lang)

            if len(languages) > 1:
                language = '{' + ','.join(str(l) for l in languages) + '}'
            else:
                language = languages[0] if len(languages) > 0 else dcatapit_config.get('dataset_languages', \
                    self._dcatapit_config.get('dataset_languages'))

            log.info("Medatata harvested dataset languages: %r", language)
        else:
            language = dcatapit_config.get('dataset_language')

        package_dict['extras'].append({'key': 'language', 'value': language})

        #  -- temporal_coverage -- #
        for key in ['temporal-extent-begin', 'temporal-extent-end']:
            if len(iso_values[key]) > 0:
                temporal_extent_value = iso_values[key][0]
                if key == 'temporal-extent-begin':
                    package_dict['extras'].append({
                        'key':
                        'temporal_start',
                        'value':
                        temporal_extent_value
                    })
                if key == 'temporal-extent-end':
                    package_dict['extras'].append({
                        'key':
                        'temporal_end',
                        'value':
                        temporal_extent_value
                    })

        #  -- conforms_to -- #
        conforms_to = iso_values["conformity-specification-title"]
        package_dict['extras'].append({
            'key': 'conforms_to',
            'value': conforms_to
        })

        #  -- creator -- #
        citedResponsiblePartys = iso_values["cited-responsible-party"]
        agent_name, agent_code = utils.get_responsible_party(citedResponsiblePartys, \
            agents.get('author', self._dcatapit_config.get('agents').get('author')))
        package_dict['extras'].append({
            'key': 'creator_name',
            'value': agent_name
        })
        package_dict['extras'].append({
            'key': 'creator_identifier',
            'value': agent_code or default_agent_code
        })

        # End of processing, return the modified package
        return package_dict
示例#3
0
    def get_package_dict(self, iso_values, harvest_object):
        package_dict = super(DCATAPITCSWHarvester, self).get_package_dict(iso_values, harvest_object)

        mapping_frequencies_to_mdr_vocabulary = self.source_config.get('mapping_frequencies_to_mdr_vocabulary',
                                                                       utils._mapping_frequencies_to_mdr_vocabulary)
        mapping_languages_to_mdr_vocabulary = self.source_config.get('mapping_languages_to_mdr_vocabulary',
                                                                     utils._mapping_languages_to_mdr_vocabulary)

        self._ckan_locales_mapping = self.source_config.get('ckan_locales_mapping') or utils._ckan_locales_mapping

        default_values = self.source_config.get('default_values') or {}

        dcatapit_config = self.source_config.get('dcatapit_config', self._dcatapit_config)

        # if dcatapit_config and not all(name in dcatapit_config for name in self._dcatapit_config):
        #    dcatapit_config = self._dcatapit_config
        #    log.warning('Some keys are missing in dcatapit_config configuration property, \
        #        keyes to use are: dataset_theme, dataset_language, agent_code, frequency, \
        #        agent_code_regex, org_name_regex and dcatapit_skos_theme_id. Using defaults')
        # elif not dcatapit_config:
        #    dcatapit_config = self._dcatapit_config

        controlled_vocabularies = dcatapit_config.get('controlled_vocabularies',
                                                      self._dcatapit_config.get('controlled_vocabularies'))
        agents = dcatapit_config.get('agents', self._dcatapit_config.get('agents'))

        # ------------------------------#
        #    MANDATORY FOR DCAT-AP_IT   #
        # ------------------------------#

        #  -- identifier -- #
        identifier = iso_values['guid']
        package_dict['extras'].append({'key': 'identifier', 'value': identifier})

        default_agent_code = identifier.split(':')[0] if ':' in identifier else None

        #  -- theme -- #
        dataset_themes = []
        if iso_values['keywords']:
            default_vocab_id = self._dcatapit_config.get('controlled_vocabularies').get('dcatapit_skos_theme_id')
            dataset_themes = utils.get_controlled_vocabulary_values('eu_themes',
                                                                    controlled_vocabularies.get('dcatapit_skos_theme_id', default_vocab_id), iso_values['keywords'])

        if dataset_themes:
            dataset_themes = list(set(dataset_themes))
            dataset_themes = [{'theme': str(l), 'subthemes': []} for l in dataset_themes]

        else:
            dataset_themes = default_values.get('dataset_theme')

        if isinstance(dataset_themes, str):
            dataset_themes = [{'theme': dt} for dt in dataset_themes.strip('{}').split(',')]

        log.info('Medatata harvested dataset themes: %r', dataset_themes)
        package_dict['extras'].append({'key': FIELD_THEMES_AGGREGATE, 'value': json.dumps(dataset_themes)})

        #  -- publisher -- #
        citedResponsiblePartys = iso_values['cited-responsible-party']
        agent_name, agent_code = utils.get_responsible_party(citedResponsiblePartys, agents.get('publisher',
                                                                                                self._dcatapit_config.get('agents').get('publisher')))
        package_dict['extras'].append({'key': 'publisher_name', 'value': agent_name})
        package_dict['extras'].append({'key': 'publisher_identifier', 'value': agent_code or default_agent_code})

        #  -- modified -- #
        revision_date = iso_values['date-updated'] or iso_values['date-released']
        package_dict['extras'].append({'key': 'modified', 'value': revision_date})

        #  -- frequency -- #
        updateFrequency = iso_values['frequency-of-update']
        package_dict['extras'].append({'key': 'frequency', 'value':
                                       mapping_frequencies_to_mdr_vocabulary.get(updateFrequency,
                                                                                 dcatapit_config.get('frequency', self._dcatapit_config.get('frequency')))})

        #  -- rights_holder -- #
        citedResponsiblePartys = iso_values['cited-responsible-party']
        agent_name, agent_code = utils.get_responsible_party(citedResponsiblePartys,
                                                             agents.get('owner', self._dcatapit_config.get('agents').get('owner')))
        package_dict['extras'].append({'key': 'holder_name', 'value': agent_name})
        package_dict['extras'].append({'key': 'holder_identifier', 'value': agent_code or default_agent_code})

        # -----------------------------------------------#
        #    OTHER FIELDS NOT MANDATORY FOR DCAT_AP-IT   #
        # -----------------------------------------------#

        #  -- alternate_identifier nothing to do  -- #

        #  -- issued -- #
        publication_date = iso_values['date-released']
        package_dict['extras'].append({'key': 'issued', 'value': publication_date})

        #  -- geographical_name  -- #
        dataset_places = []
        if iso_values['keywords']:
            default_vocab_id = self._dcatapit_config.get('controlled_vocabularies').get('dcatapit_skos_theme_id')
            dataset_places = utils.get_controlled_vocabulary_values('places',
                                                                    controlled_vocabularies.get('dcatapit_skos_places_id', default_vocab_id), iso_values['keywords'])

        if dataset_places and len(dataset_places) > 1:
            dataset_places = list(set(dataset_places))
            dataset_places = '{' + ','.join(str(l) for l in dataset_places) + '}'
        else:
            dataset_places = dataset_places[0] if dataset_places and len(dataset_places) > 0 else dcatapit_config.get('dataset_places',
                                                                                                                      self._dcatapit_config.get('dataset_places'))

        if dataset_places:
            log.info('Medatata harvested dataset places: %r', dataset_places)
            package_dict['extras'].append({'key': 'geographical_name', 'value': dataset_places})

        #  -- geographical_geonames_url nothing to do  -- #

        #  -- language -- #
        dataset_languages = iso_values['dataset-language']
        language = None
        if dataset_languages and len(dataset_languages) > 0:
            languages = []
            for language in dataset_languages:
                lang = mapping_languages_to_mdr_vocabulary.get(language, None)
                if lang:
                    languages.append(lang)

            if len(languages) > 1:
                language = '{' + ','.join(str(l) for l in languages) + '}'
            else:
                language = languages[0] if len(languages) > 0 else dcatapit_config.get('dataset_languages',
                                                                                       self._dcatapit_config.get('dataset_languages'))

            log.info('Medatata harvested dataset languages: %r', language)
        else:
            language = dcatapit_config.get('dataset_language')

        package_dict['extras'].append({'key': 'language', 'value': language})

        # temporal_coverage
        # ##################
        temporal_coverage = []
        temporal_start = None
        temporal_end = None

        for key in ['temporal-extent-begin', 'temporal-extent-end']:
            if len(iso_values[key]) > 0:
                temporal_extent_value = iso_values[key][0]
                if key == 'temporal-extent-begin':
                    temporal_start = temporal_extent_value
                if key == 'temporal-extent-end':
                    temporal_end = temporal_extent_value
        if temporal_start:
            temporal_coverage.append({'temporal_start': temporal_start,
                                      'temporal_end': temporal_end})
        if temporal_coverage:
            package_dict['extras'].append({'key': 'temporal_coverage', 'value': json.dumps(temporal_coverage)})

        # conforms_to
        # ##################
        conforms_to_identifier = iso_values['conformity-specification-title']
        conforms_to_locale = self._ckan_locales_mapping.get(iso_values['metadata-language'], 'it').lower()

        conforms_to = {'identifier': conforms_to_identifier,
                       'title': {conforms_to_locale: conforms_to_identifier}}

        if conforms_to:
            package_dict['extras'].append({'key': 'conforms_to', 'value': json.dumps([conforms_to])})

        # creator
        # ###############
        #  -- creator -- #
        citedResponsiblePartys = iso_values['cited-responsible-party']
        agent_name, agent_code = utils.get_responsible_party(citedResponsiblePartys,
                                                             agents.get('author', self._dcatapit_config.get('agents').get('author')))

        agent_code = agent_code or default_agent_code
        if (agent_name and agent_code):

            creator = {}
            creator_lang = self._ckan_locales_mapping.get(iso_values['metadata-language'], 'it').lower()
            creator['creator_name'] = {creator_lang: agent_name}
            creator['creator_identifier'] = agent_code
            package_dict['extras'].append({'key': 'creator', 'value': json.dumps([creator])})

        # ckan_license
        # ##################
        ckan_license = None
        use_constraints = iso_values.get('use-constraints')
        if use_constraints:
            use_constraints = use_constraints[0]
            import ckan.logic.action.get as _license
            license_list = _license.license_list({'model': model, 'session': Session, 'user': '******'}, {})
            for license in license_list:
                if use_constraints == str(license.get('id')) or use_constraints == str(license.get('url')) or (str(license.get('id')) in use_constraints.lower()):
                    ckan_license = license
                    break

        if ckan_license:
            package_dict['license_id'] = ckan_license.get('id')
        else:
            default_license = self.source_config.get('default_license')
            if default_license:
                package_dict['license_id'] = default_license

        #  -- license handling -- #
        interfaces.populate_resource_license(package_dict)

        # End of processing, return the modified package
        return package_dict