def __init__(self, name='dcatde_rdf'): ''' Set global parameters from config ''' DCATRDFHarvester.__init__(self) self.triplestore_client = FusekiTriplestoreClient() self.shacl_validator_client = ShaclValidator() self.licenses_upgrade = {} license_file = pylons.config.get( 'ckanext.dcatde.urls.dcat_licenses_upgrade_mapping') if license_file: self.licenses_upgrade = load_json_mapping( license_file, "DCAT License upgrade mapping", LOGGER) try: self.email_validator = toolkit.get_validator('email_validator') except UnknownValidator: pass
def validate_config(self, source_config): if not source_config: return source_config try: config_obj = json.loads(source_config) if dhc.HS_PROP_DEFULT_CATALOG_LANGUAGE in config_obj: default_catalog_language = config_obj.get( dhc.HS_PROP_DEFULT_CATALOG_LANGUAGE) locals_offered = pc.get(dhc.CKAN_PROP_LOCALES_OFFERED, '').split() if default_catalog_language not in locals_offered: raise ValueError( 'main_catalog_language must be a value of %s', locals_offered) DCATRDFHarvester.validate_config(self, source_config) except ValueError, e: raise e
def test_ckan_duplicated_name(self): dataset0 = { 'owner_org': self.org['id'], 'holder_name': 'test holder', 'holder_identifier': 'abcdef', 'notes': 'some notes', 'modified': '2000-01-01', 'theme': 'AGRI', 'frequency': 'UNKNOWN', 'publisher_name': 'publisher', 'identifier': 'aasdfa', 'publisher_identifier': 'publisher', 'resources': [], 'extras': [], } dataset1 = { 'owner_org': self.org['id'], 'title': 'duplicated title', 'name': 'duplicated-title', 'id': 'dummyid' } dataset1.update(dataset0) data = json.dumps(dataset1) harvest_dict = self._create_harvest_obj('http://mock/source/', name='dupname1', owner_org=self.org['id']) harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = data h = DCATRDFHarvester() import_successful = h.import_stage(harvest_obj) self.assertTrue(import_successful, harvest_obj.errors) Session.flush() dataset1['_id'] = harvest_obj.package_id dataset2 = {'title': 'duplicated title', 'name': 'duplicated-title', 'id': 'dummyid2'} dataset2.update(dataset0) dataset2['identifier'] = 'otherid' data = json.dumps(dataset2) harvest_dict = self._create_harvest_obj('http://mock/source/', name='dupname2', owner_org=self.org['id']) harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = data h = DCATRDFHarvester() import_successful = h.import_stage(harvest_obj) self.assertTrue(import_successful, harvest_obj.errors) Session.flush() dataset2['_id'] = harvest_obj.package_id # duplicated names are mangled, one should have numeric suffix pkg_dict = helpers.call_action('package_show', context={}, name_or_id=dataset1['_id']) self.assertEqual(pkg_dict['title'], dataset1['title']) self.assertEqual(pkg_dict['name'], 'duplicated-title') pkg_dict = helpers.call_action('package_show', context={}, name_or_id=dataset2['_id']) self.assertEqual(pkg_dict['title'], dataset2['title']) self.assertEqual(pkg_dict['name'], 'duplicated-title1')
def test_remote_orgs(self): dataset = {'title': 'some title 2', 'owner_id': self.org['id'], 'id': 'sometitle2', 'name': 'somename', 'holder_name': 'test holder', 'holder_identifier': 'abcdef', 'notes': 'some notes', 'modified': '2000-01-01', 'theme': 'AGRI', 'frequency': 'UNKNOWN', 'publisher_name': 'publisher', 'identifier': 'identifier2', 'publisher_identifier': 'publisher', } # no org creation, holder_identifier should be assigned to dataset data = json.dumps(dataset) harvest_dict = self._create_harvest_obj('http://mock/source/a', name='testpkg_2', config=json.dumps({'remote_orgs': 'no-create'}), owner_org=self.org['id'], ) harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = data h = DCATRDFHarvester() out = h.import_stage(harvest_obj) self.assertTrue(out, harvest_obj.errors) pkg = helpers.call_action('package_show', context={}, name_or_id='some-title-2') for k in ('holder_name', 'holder_identifier',): self.assertEqual(pkg.get(k), dataset[k]) # check for new org dataset.update({'id': 'sometitle3', 'name': munge_name('some title 3'), 'title': 'some title 3', 'holder_name': 'test test holder', 'holder_identifier': 'abcdefg', 'identifier': 'identifier3', }) harvest_dict = self._create_harvest_obj('http://mock/source/b', name='testpkg_3', config=json.dumps({'remote_orgs': 'create'}), owner_org=self.org['id'], ) harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = json.dumps(dataset) out = h.import_stage(harvest_obj) self.assertTrue(out, harvest_obj.errors) pkg = helpers.call_action('package_show', context={}, name_or_id='testpkg_3') self.assertTrue(out) self.assertTrue(isinstance(out, bool)) pkg = helpers.call_action('package_show', context={}, name_or_id=dataset['name']) org_id = pkg['owner_org'] self.assertIsNotNone(org_id) org = helpers.call_action('organization_show', context={}, id=org_id) self.assertEqual(org['identifier'], dataset['holder_identifier']) # package's holder should be updated with organization's data for k in (('holder_name', 'title',), ('holder_identifier', 'identifier',)): self.assertEqual(pkg.get(k[0]), org[k[1]]) # check for existing org dataset.update({'id': 'sometitle4', 'name': munge_name('some title 4'), 'title': 'some title 4', 'identifier': 'identifier4', }) harvest_dict = self._create_harvest_obj('http://mock/source/c', name='testpkg_4', config=json.dumps({'remote_orgs': 'create'}), owner_org=self.org['id'], ) harvest_obj = HarvestObject.get(harvest_dict['id']) harvest_obj.content = json.dumps(dataset) out = h.import_stage(harvest_obj) self.assertTrue(out, harvest_obj.errors) pkg = helpers.call_action('package_show', context={}, name_or_id='testpkg_4') self.assertTrue(isinstance(out, bool)) pkg = helpers.call_action('package_show', context={}, name_or_id=dataset['name']) org_id = pkg['owner_org'] self.assertIsNotNone(org_id) org = helpers.call_action('organization_show', context={}, id=org_id) self.assertEqual(org['identifier'], dataset['holder_identifier'])