def test_munge_name_multiple_pass(self): '''Munging name multiple times produces same result.''' for org, exp in self.munge_list: first_munge = munge_name(org) assert_equal(first_munge, exp) second_munge = munge_name(first_munge) assert_equal(second_munge, exp)
def test_munge_name_multiple_pass(self): '''Munging name multiple times produces same result.''' for org, exp in self.munge_list: first_munge = munge_name(org) nose_tools.assert_equal(first_munge, exp) second_munge = munge_name(first_munge) nose_tools.assert_equal(second_munge, exp)
def make_userdict(self): return { "email": self.email, "name": munge.munge_name(self.name), "sysadmin": self.is_sysadmin(), "plugin_extras": {"drupal_idp": dataclasses.asdict(self)}, }
def taxonomy_create(context, data_dict): """ Creates a new taxonomy. Terms are not created here, they must be created using taxonomy_term_create with the taxonomy id from this call. :param owner_org: the id of the dataset's owning organization, see :returns: The newly created taxonomy :rtype: A dictionary. """ _check_access('taxonomy_create', context, data_dict) model = context['model'] name = data_dict.get('name') title = logic.get_or_bust(data_dict, 'title') uri = logic.get_or_bust(data_dict, 'uri') if not name: name = munge_name(title) # Check the name has not been used if model.Session.query(Taxonomy).filter(Taxonomy.name == name).count() > 0: raise logic.ValidationError("Name is already in use") t = Taxonomy(name=name, title=title, uri=uri) model.Session.add(t) model.Session.commit() return t.as_dict()
def _handle_rights_holder(self, dataset_dict, temp_dict, job): try: config = json.loads(job.source.config) if job.source.config else {} except (TypeError, ValueError) as err: log.warning('Cannot parse job config to get rights holder: %s', err, exc_info=err) config = {} orgs_conf = config.get('remote_orgs', None) ctx = {'ignore_auth': True, 'user': self._get_user_name()} if orgs_conf in ('create', ): holder_name = dataset_dict.get('holder_name', None) holder_identifier = dataset_dict.get('holder_identifier', None) if holder_identifier and holder_name: org = dcatapit_helpers\ .get_organization_by_identifier(ctx, holder_identifier) if not org: org_dict = { 'identifier': holder_identifier, 'name': munge_name(holder_name), 'title': holder_name } act = p.toolkit.get_action('organization_create') org = act(context=ctx, data_dict=org_dict) dataset_dict['owner_org'] = org['name'] # remove holder fields, as this info will be handled in org dataset_dict.pop('holder_name', None) dataset_dict.pop('holder_identifier', None)
def _get_group(self, base_url, group_name): url = base_url + self._get_rest_api_offset() + '/group/' + munge_name(group_name) try: content = self._get_content(url) return json.loads(content) except (ContentFetchError, ValueError): log.debug('Could not fetch/decode remote group'); raise RemoteResourceError('Could not fetch/decode remote group')
def _get_group(self, base_url, group_name): url = base_url + self._get_action_api_offset() + "/group_show?id=" + munge_name(group_name) try: content = self._get_content(url) return json.loads(content) except (ContentFetchError, ValueError): log.debug("Could not fetch/decode remote group") raise RemoteResourceError("Could not fetch/decode remote group")
def _get_group(self, base_url, group_name): url = (base_url + self._get_rest_api_offset() + "/group/" + munge_name(group_name)) try: content = self._get_content(url) return json.loads(content) except (ContentFetchError, ValueError): log.debug("Could not fetch/decode remote group") raise RemoteResourceError("Could not fetch/decode remote group")
def _get_group(self, base_url, group_name): url = base_url + self._get_action_api_offset() + '/group_show?id=' + \ munge_name(group_name) try: content = self._get_content(url) return json.loads(content) except (ContentFetchError, ValueError): log.debug('Could not fetch/decode remote group') raise RemoteResourceError('Could not fetch/decode remote group')
def improve_pkg_dict(self, pkg_dict, params, data=None): if pkg_dict['name'] != '': pkg_dict['name'] = munge_name(pkg_dict['name']).replace('_', '-') else: pkg_dict['name'] = munge_title_to_name(pkg_dict['title']) if pkg_dict['url'] == '': pkg_dict.pop('url', None) # override the 'id' as this never matches the CKAN internal ID pkg_dict['id'] = pkg_dict['name'] if params is not None and params.get(license, None) is not None: pkg_dict['license_id'] = params['license'] else: pkg_dict['license_id'] = config.get('ckanext.ddi.default_license') # TODO: move all this to a interface method in ckanext-unhcr # Save as draft to enable the stages on the form # TODO: what about updates (if overriding) pkg_dict['state'] = 'draft' if data: for field in ('owner_org', 'private'): if field in data: pkg_dict[field] = data[field] pkg_dict['archived'] = 'False' if pkg_dict.get('tags'): pkg_dict['keywords'] = [tag['name'] for tag in pkg_dict['tags']] if pkg_dict.get('unit_of_analysis'): pkg_dict['unit_of_measurement'] = pkg_dict['unit_of_analysis'] if pkg_dict.get('data_collector'): pkg_dict['data_collector'] = _get_data_collector_values( pkg_dict['data_collector']) if pkg_dict.get('data_collection_technique'): pkg_dict[ 'data_collection_technique'] = _get_data_collection_technique_value( pkg_dict['data_collection_technique']) if pkg_dict.get('id_number'): pkg_dict['original_id'] = pkg_dict['id_number'] if pkg_dict.get('abstract'): pkg_dict['notes'] = pkg_dict['abstract'] if pkg_dict.get('abbreviation'): pkg_dict['short_title'] = pkg_dict['abbreviation'] pkg_dict['ddi'] = True return pkg_dict
def test_into_user(self, details_data): details = utils.Details(**details_data) userdict = details.make_userdict() assert userdict == { "email": details_data["email"], "name": munge.munge_name(details_data["name"]), "sysadmin": False, "plugin_extras": { "drupal_idp": details_data }, }
def improve_pkg_dict(self, pkg_dict, params): if pkg_dict['name'] != '': pkg_dict['name'] = munge_name(pkg_dict['name']).replace('_', '-') else: pkg_dict['name'] = munge_title_to_name(pkg_dict['title']) if pkg_dict['url'] == '': pkg_dict.pop('url', None) # override the 'id' as this never matches the CKAN internal ID pkg_dict['id'] = pkg_dict['name'] if params is not None and params.get(license, None) is not None: pkg_dict['license_id'] = params['license'] else: pkg_dict['license_id'] = config.get('ckanext.ddi.default_license') return pkg_dict
def _create_or_update_package(self, base_name, metadata): pkg_name = munge_name(base_name.replace('#', '_')) extras_list = self._generate_extras(metadata) pkg_dict = { 'name': pkg_name, 'title': metadata.get('title', base_name), 'notes': metadata.get('doc_excerpt', None), 'tags': metadata.get('tags', None), 'maintainer': metadata.get('publisher', None), 'author': metadata.get('creator', None), 'extras': extras_list, 'resources': [], } pprint(pkg_dict) try: print "pkg_name: %s" % pkg_name pkg = self.ckan.action.package_show(id=pkg_name) pkg.update(pkg_dict) self.ckan.call_action('package_update', pkg) except ckanapi.NotFound: pkg = self.ckan.call_action('package_create', pkg_dict) return pkg
class DCATAPITHarvesterPlugin(p.SingletonPlugin): p.implements(IDCATRDFHarvester, inherit=True) def before_download(self, url, harvest_job): return url, [] def update_session(self, session): return session def after_download(self, content, harvest_job): return content, [] def before_update(self, harvest_object, dataset_dict, temp_dict): self._before(dataset_dict, temp_dict, harvest_object) def after_update(self, harvest_object, dataset_dict, temp_dict): return self._after(dataset_dict, temp_dict) def before_create(self, harvest_object, dataset_dict, temp_dict): self._before_create(harvest_object, dataset_dict) self._before(dataset_dict, temp_dict, harvest_object) def after_create(self, harvest_object, dataset_dict, temp_dict): return self._after(dataset_dict, temp_dict) def _before_create(self, harvest_object, dataset_dict): title = dataset_dict['title'] name = HarvesterBase._gen_new_name(title) if not name: raise Exception('Could not generate a unique name ' 'from the title or the GUID. Please ' 'choose a more unique title.') dataset_dict['name'] = name def _before(self, dataset_dict, temp_dict, job): loc_dict = dataset_dict.pop(LOCALISED_DICT_NAME_BASE, {}) res_dict = dataset_dict.pop(LOCALISED_DICT_NAME_RESOURCES, {}) if loc_dict or res_dict: temp_dict['dcatapit'] = { LOCALISED_DICT_NAME_BASE: loc_dict, LOCALISED_DICT_NAME_RESOURCES: res_dict } self._handle_rights_holder(dataset_dict, temp_dict, job) def _handle_rights_holder(self, dataset_dict, temp_dict, job): try: config = json.loads(job.source.config) if job.source.config else {} except ( TypeError, ValueError, ), err: log.warning("Cannot parse job config to get rights holder: %s", err, exc_info=err) config = {} orgs_conf = config.get('remote_orgs', None) ctx = {'ignore_auth': True, 'user': self._get_user_name()} if orgs_conf in ('create', ): holder_name = dataset_dict.get('holder_name', None) holder_identifier = dataset_dict.get('holder_identifier', None) if holder_identifier and holder_name: org = dcatapit_helpers\ .get_organization_by_identifier(ctx, holder_identifier) if not org: org_dict = { 'identifier': holder_identifier, 'name': munge_name(holder_name), 'title': holder_name } act = p.toolkit.get_action('organization_create') org = act(context=ctx, data_dict=org_dict) dataset_dict['owner_org'] = org['name'] # remove holder fields, as this info will be handled in org dataset_dict.pop('holder_name', None) dataset_dict.pop('holder_identifier', None)
def test_munge_name(self): '''Munge a list of names gives expected results.''' for org, exp in self.munge_list: munge = munge_name(org) nose_tools.assert_equal(munge, exp)
def test_munge(title, expected_munge): munge = munge_name(title) assert_equal(munge, expected_munge)
def munge_package_name(self): name = request.params.get('name') munged_name = munge.munge_name(name) return self._finish_ok(munged_name)
orgs_conf = config.get('remote_orgs', None) ctx = {'ignore_auth': True, 'user': self._get_user_name()} if orgs_conf in ('create', ): holder_name = dataset_dict.get('holder_name', None) holder_identifier = dataset_dict.get('holder_identifier', None) if holder_identifier and holder_name: org = dcatapit_helpers.get_organization_by_identifier( ctx, holder_identifier) if not org: org_dict = { 'identifier': holder_identifier, 'name': munge_name(holder_name), 'title': holder_name } org = p.toolkit.get_action('organization_create')( context=ctx, data_dict=org_dict) dataset_dict['owner_org'] = org['name'] # remove holder fields, as this info will be handled in org dataset_dict.pop('holder_name', None) dataset_dict.pop('holder_identifier', None) def _after(self, dataset_dict, temp_dict): dcatapit_dict = temp_dict.get('dcatapit') if not dcatapit_dict: return None
def test_remote_orgs(self): dataset = {'title': 'some title 2', 'owner_id': self.org['id'], 'id': 'sometitle2', 'name': 'somename', 'holder_name': 'test holder', 'holder_identifier': 'abcdef', 'notes': 'some notes', 'modified': '2000-01-01', 'theme': 'AGRI', 'frequency': 'UNKNOWN', 'publisher_name': 'publisher', 'identifier': 'identifier2', 'publisher_identifier': 'publisher', } # no org creation, holder_identifier should be assigned to dataset data = json.dumps(dataset) harvest_dict = self._create_harvest_obj('http://mock/source/a', name='testpkg_2', config=json.dumps({'remote_orgs': 'no-create'}), owner_org=self.org['id'], ) harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = data h = DCATRDFHarvester() out = h.import_stage(harvest_obj) self.assertTrue(out, harvest_obj.errors) pkg = helpers.call_action('package_show', context={}, name_or_id='some-title-2') for k in ('holder_name', 'holder_identifier',): self.assertEqual(pkg.get(k), dataset[k]) # check for new org dataset.update({'id': 'sometitle3', 'name': munge_name('some title 3'), 'title': 'some title 3', 'holder_name': 'test test holder', 'holder_identifier': 'abcdefg', 'identifier': 'identifier3', }) harvest_dict = self._create_harvest_obj('http://mock/source/b', name='testpkg_3', config=json.dumps({'remote_orgs': 'create'}), owner_org=self.org['id'], ) harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = json.dumps(dataset) out = h.import_stage(harvest_obj) self.assertTrue(out, harvest_obj.errors) pkg = helpers.call_action('package_show', context={}, name_or_id='testpkg_3') self.assertTrue(out) self.assertTrue(isinstance(out, bool)) pkg = helpers.call_action('package_show', context={}, name_or_id=dataset['name']) org_id = pkg['owner_org'] self.assertIsNotNone(org_id) org = helpers.call_action('organization_show', context={}, id=org_id) self.assertEqual(org['identifier'], dataset['holder_identifier']) # package's holder should be updated with organization's data for k in (('holder_name', 'title',), ('holder_identifier', 'identifier',)): self.assertEqual(pkg.get(k[0]), org[k[1]]) # check for existing org dataset.update({'id': 'sometitle4', 'name': munge_name('some title 4'), 'title': 'some title 4', 'identifier': 'identifier4', }) harvest_dict = self._create_harvest_obj('http://mock/source/c', name='testpkg_4', config=json.dumps({'remote_orgs': 'create'}), owner_org=self.org['id'], ) harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = json.dumps(dataset) out = h.import_stage(harvest_obj) self.assertTrue(out, harvest_obj.errors) pkg = helpers.call_action('package_show', context={}, name_or_id='testpkg_4') self.assertTrue(isinstance(out, bool)) pkg = helpers.call_action('package_show', context={}, name_or_id=dataset['name']) org_id = pkg['owner_org'] self.assertIsNotNone(org_id) org = helpers.call_action('organization_show', context={}, id=org_id) self.assertEqual(org['identifier'], dataset['holder_identifier'])
def dataworld_name(title): cleaned_title = ' '.join(title.split()).replace('_', '-').replace(' ', '-') return munge_name('-'.join(filter(None, cleaned_title.split('-'))))
def test_munge_name_pass(original, expected): """Munging name multiple times produces same result.""" first_munge = munge_name(original) assert first_munge == expected second_munge = munge_name(first_munge) assert second_munge == expected
def get_package_dict(self, context, data_dict): package_dict = data_dict['package_dict'] iso_values = data_dict['iso_values'] harvest_object = data_dict['harvest_object'] source_config = json.loads(data_dict['harvest_object'].source.config) xml_location_url = self._get_object_extra(data_dict['harvest_object'], 'waf_location') xml_modified_date = self._get_object_extra(data_dict['harvest_object'], 'waf_modified_date') # convert extras key:value list to dictinary extras = {x['key']: x['value'] for x in package_dict.get('extras', [])} extras['xml_location_url'] = xml_location_url if xml_modified_date: extras['xml_modified_date'] = xml_modified_date # copy some fields over from iso_values if they exist if (iso_values.get('limitations-on-public-access')): extras['limitations-on-public-access'] = iso_values.get( 'limitations-on-public-access') if (iso_values.get('access-constraints')): extras['access-constraints'] = iso_values.get('access-constraints') if (iso_values.get('use-constraints')): extras['use-constraints'] = iso_values.get('use-constraints') if (iso_values.get('use-constraints-code')): extras['use-constraints-code'] = iso_values.get( 'use-constraints-code') if (iso_values.get('legal-constraints-reference-code')): extras['legal-constraints-reference-code'] = iso_values.get( 'legal-constraints-reference-code') if (iso_values.get('distributor')): extras['distributor'] = iso_values.get('distributor') # load remote xml content package_dict = _extract_xml_from_harvest_object( package_dict, harvest_object) # Handle Scheming, Composit, and Fluent extensions loaded_plugins = plugins.toolkit.config.get("ckan.plugins") if 'scheming_datasets' in loaded_plugins: # composite = 'composite' in loaded_plugins fluent = 'fluent' in loaded_plugins log.debug( '#### Scheming, Composite, or Fluent extensions found, processing dictinary ####' ) schema = plugins.toolkit.h.scheming_get_dataset_schema('dataset') # Package name, default harvester uses title or guid in that order. # we want to reverse that order, so guid or title. Also use english # title only for name title_as_name = self.from_json(package_dict.get( 'title', '{}')).get('en', package_dict['name']) name = munge.munge_name(extras.get('guid', title_as_name)).lower() package_dict['name'] = name # populate license_id package_dict['license_id'] = iso_values.get( 'legal-constraints-reference-code') or iso_values.get( 'use-constraints') or 'CC-BY-4.0' # populate citation package_dict['citation'] = iso_values.get('citation') # populate trlanslation method for bilingual field notes_translation_method = iso_values.get( 'abstract_translation_method') title_translation_method = iso_values.get( 'title_translation_method') if notes_translation_method: extras['notes_translation_method'] = notes_translation_method if title_translation_method: extras['title_translation_method'] = title_translation_method # iterate over schema fields and update package dictionary as needed for field in schema['dataset_fields']: handled_fields = [] self.handle_composite_harvest_dictinary( field, iso_values, extras, package_dict, handled_fields) if fluent: self.handle_fluent_harvest_dictinary( field, iso_values, package_dict, schema, handled_fields, source_config) self.handle_scheming_harvest_dictinary(field, iso_values, extras, package_dict, handled_fields) # populate resource format if missing for resource in package_dict.get('resources', []): if not resource.get('format'): if (resource.get('resource_locator_protocol').startswith( 'http') or resource.get('url').startswith('http')): resource['format'] = 'text/html' # set default values package_dict['progress'] = extras.get('progress', 'onGoing') package_dict['frequency-of-update'] = extras.get( 'frequency-of-update', 'asNeeded') extras_as_list = [] for key, value in extras.items(): if package_dict.get(key, ''): log.error('extras %s found in package dict: key:%s value:%s', key, key, value) if isinstance(value, (list, dict)): extras_as_list.append({'key': key, 'value': json.dumps(value)}) else: extras_as_list.append({'key': key, 'value': value}) package_dict['extras'] = extras_as_list # update resource format resources = package_dict.get('resources', []) if len(resources): for resource in resources: url = resource.get('url', '').strip() format = resource.get('format') or '' if url: format = self.cioos_guess_resource_format(url) or format resource['format'] = format package_dict['resources'] = resources return self.trim_values(package_dict)
def improve_pkg_dict(self, pkg_dict, params, data=None): if pkg_dict['name'] != '': pkg_dict['name'] = munge_name(pkg_dict['name']).replace('_', '-') else: pkg_dict['name'] = munge_title_to_name(pkg_dict['title']) if pkg_dict['url'] == '': pkg_dict.pop('url', None) # override the 'id' as this never matches the CKAN internal ID pkg_dict['id'] = pkg_dict['name'] if params is not None and params.get(license, None) is not None: pkg_dict['license_id'] = params['license'] else: pkg_dict['license_id'] = tk.config.get('ckanext.ddi.default_license') # TODO: move all this to a interface method in ckanext-unhcr # Save as draft to enable the stages on the form # TODO: what about updates (if overriding) pkg_dict['state'] = 'draft' if data: for field in ( 'owner_org', 'private', 'visibility', # for resources 'license_id', 'external_access_level', ): if field in data: pkg_dict[field] = data[field] pkg_dict['archived'] = 'False' if pkg_dict.get('keywords'): pkg_dict['keywords'] = _get_keywords(pkg_dict['keywords']) if pkg_dict.get('unit_of_analysis'): pkg_dict['unit_of_measurement'] = pkg_dict['unit_of_analysis'] if pkg_dict.get('data_collector'): pkg_dict['data_collector'] = _get_data_collector_values( pkg_dict['data_collector']) if pkg_dict.get('data_collection_technique'): pkg_dict['data_collection_technique'] = _get_data_collection_technique_value( pkg_dict['data_collection_technique']) if pkg_dict.get('id_number'): pkg_dict['original_id'] = pkg_dict['id_number'] if pkg_dict.get('abstract'): pkg_dict['notes'] = pkg_dict['abstract'] if pkg_dict.get('abbreviation'): pkg_dict['short_title'] = pkg_dict['abbreviation'] if not pkg_dict.get('country_codes'): # geographies fields is now a required field. # We need to ensure any dataset to have a "geographies" field filled pkg_dict['country_codes'] = tk.config.get( 'ckanext.ddi.default_country_code', 'UNSPECIFIED' ) pkg_dict['ddi'] = True return pkg_dict
def test_munge_name(self): '''Munge a list of names gives expected results.''' for org, exp in self.munge_list: munge = munge_name(org) assert_equal(munge, exp)