示例#1
0
    def __init__(self, name='dcatde_rdf'):
        '''
        Set global parameters from config
        '''
        DCATRDFHarvester.__init__(self)

        self.triplestore_client = FusekiTriplestoreClient()
        self.shacl_validator_client = ShaclValidator()

        self.licenses_upgrade = {}
        license_file = pylons.config.get(
            'ckanext.dcatde.urls.dcat_licenses_upgrade_mapping')
        if license_file:
            self.licenses_upgrade = load_json_mapping(
                license_file, "DCAT License upgrade mapping", LOGGER)
        try:
            self.email_validator = toolkit.get_validator('email_validator')
        except UnknownValidator:
            pass
示例#2
0
    def validate_config(self, source_config):
        if not source_config:
            return source_config

        try:
            config_obj = json.loads(source_config)
            if dhc.HS_PROP_DEFULT_CATALOG_LANGUAGE in config_obj:
                default_catalog_language = config_obj.get(
                    dhc.HS_PROP_DEFULT_CATALOG_LANGUAGE)
                locals_offered = pc.get(dhc.CKAN_PROP_LOCALES_OFFERED,
                                        '').split()
                if default_catalog_language not in locals_offered:
                    raise ValueError(
                        'main_catalog_language must be a value of %s',
                        locals_offered)

            DCATRDFHarvester.validate_config(self, source_config)
        except ValueError, e:
            raise e
    def test_ckan_duplicated_name(self):
        dataset0 = {
            'owner_org': self.org['id'],
            'holder_name': 'test holder',
            'holder_identifier': 'abcdef',
            'notes': 'some notes',
            'modified': '2000-01-01',
            'theme': 'AGRI',
            'frequency': 'UNKNOWN',
            'publisher_name': 'publisher',
            'identifier': 'aasdfa',
            'publisher_identifier': 'publisher',
            'resources': [],
            'extras': [],
        }

        dataset1 = {
            'owner_org': self.org['id'],
            'title': 'duplicated title',
            'name': 'duplicated-title',
            'id': 'dummyid'
        }
        dataset1.update(dataset0)
        data = json.dumps(dataset1)

        harvest_dict = self._create_harvest_obj('http://mock/source/', name='dupname1', owner_org=self.org['id'])
        harvest_obj = HarvestObject.get(harvest_dict['id'])
        harvest_obj.content = data
        h = DCATRDFHarvester()
        import_successful = h.import_stage(harvest_obj)
        self.assertTrue(import_successful, harvest_obj.errors)
        Session.flush()
        dataset1['_id'] = harvest_obj.package_id

        dataset2 = {'title': 'duplicated title',
                    'name': 'duplicated-title',
                    'id': 'dummyid2'}

        dataset2.update(dataset0)
        dataset2['identifier'] = 'otherid'
        data = json.dumps(dataset2)

        harvest_dict = self._create_harvest_obj('http://mock/source/', name='dupname2', owner_org=self.org['id'])
        harvest_obj = HarvestObject.get(harvest_dict['id'])
        harvest_obj.content = data
        h = DCATRDFHarvester()
        import_successful = h.import_stage(harvest_obj)
        self.assertTrue(import_successful, harvest_obj.errors)
        Session.flush()
        dataset2['_id'] = harvest_obj.package_id

        # duplicated names are mangled, one should have numeric suffix
        pkg_dict = helpers.call_action('package_show', context={}, name_or_id=dataset1['_id'])
        self.assertEqual(pkg_dict['title'], dataset1['title'])
        self.assertEqual(pkg_dict['name'], 'duplicated-title')

        pkg_dict = helpers.call_action('package_show', context={}, name_or_id=dataset2['_id'])
        self.assertEqual(pkg_dict['title'], dataset2['title'])
        self.assertEqual(pkg_dict['name'], 'duplicated-title1')
    def test_remote_orgs(self):
        dataset = {'title': 'some title 2',
                   'owner_id': self.org['id'],
                   'id': 'sometitle2',
                   'name': 'somename',
                   'holder_name': 'test holder',
                   'holder_identifier': 'abcdef',
                   'notes': 'some notes',
                   'modified': '2000-01-01',
                   'theme': 'AGRI',
                   'frequency': 'UNKNOWN',
                   'publisher_name': 'publisher',
                   'identifier': 'identifier2',
                   'publisher_identifier': 'publisher',
                   }

        # no org creation, holder_identifier should be assigned to dataset
        data = json.dumps(dataset)
        harvest_dict = self._create_harvest_obj('http://mock/source/a',
                                                name='testpkg_2',
                                                config=json.dumps({'remote_orgs': 'no-create'}),
                                                owner_org=self.org['id'],
                                                )
        harvest_obj = HarvestObject.get(harvest_dict['id'])
        harvest_obj.content = data

        h = DCATRDFHarvester()
        out = h.import_stage(harvest_obj)
        self.assertTrue(out, harvest_obj.errors)

        pkg = helpers.call_action('package_show', context={}, name_or_id='some-title-2')

        for k in ('holder_name', 'holder_identifier',):
            self.assertEqual(pkg.get(k), dataset[k])

        # check for new org
        dataset.update({'id': 'sometitle3',
                        'name': munge_name('some title 3'),
                        'title': 'some title 3',
                        'holder_name': 'test test holder',
                        'holder_identifier': 'abcdefg',
                        'identifier': 'identifier3',
                        })

        harvest_dict = self._create_harvest_obj('http://mock/source/b',
                                                name='testpkg_3',
                                                config=json.dumps({'remote_orgs': 'create'}),
                                                owner_org=self.org['id'],
                                                )
        harvest_obj = HarvestObject.get(harvest_dict['id'])
        harvest_obj.content = json.dumps(dataset)

        out = h.import_stage(harvest_obj)
        self.assertTrue(out, harvest_obj.errors)
        pkg = helpers.call_action('package_show', context={}, name_or_id='testpkg_3')
        self.assertTrue(out)
        self.assertTrue(isinstance(out, bool))
        pkg = helpers.call_action('package_show', context={}, name_or_id=dataset['name'])

        org_id = pkg['owner_org']

        self.assertIsNotNone(org_id)
        org = helpers.call_action('organization_show', context={}, id=org_id)
        self.assertEqual(org['identifier'], dataset['holder_identifier'])

        # package's holder should be updated with organization's data
        for k in (('holder_name', 'title',), ('holder_identifier', 'identifier',)):
            self.assertEqual(pkg.get(k[0]), org[k[1]])

        # check for existing org

        dataset.update({'id': 'sometitle4',
                        'name': munge_name('some title 4'),
                        'title': 'some title 4',
                        'identifier': 'identifier4',
                        })

        harvest_dict = self._create_harvest_obj('http://mock/source/c',
                                                name='testpkg_4',
                                                config=json.dumps({'remote_orgs': 'create'}),
                                                owner_org=self.org['id'],
                                                )
        harvest_obj = HarvestObject.get(harvest_dict['id'])
        harvest_obj.content = json.dumps(dataset)

        out = h.import_stage(harvest_obj)
        self.assertTrue(out, harvest_obj.errors)
        pkg = helpers.call_action('package_show', context={}, name_or_id='testpkg_4')
        self.assertTrue(isinstance(out, bool))
        pkg = helpers.call_action('package_show', context={}, name_or_id=dataset['name'])

        org_id = pkg['owner_org']

        self.assertIsNotNone(org_id)
        org = helpers.call_action('organization_show', context={}, id=org_id)
        self.assertEqual(org['identifier'], dataset['holder_identifier'])