Пример #1
0
    def gather_stage(self, harvest_job):
        '''
        The gather stage will recieve a HarvestJob object and will be
        responsible for:
            - gathering all the necessary objects to fetch on a later.
              stage (e.g. for a CSW server, perform a GetRecords request)
            - creating the necessary HarvestObjects in the database, specifying
              the guid and a reference to its source and job.
            - creating and storing any suitable HarvestGatherErrors that may
              occur.
            - returning a list with all the ids of the created HarvestObjects.

        :param harvest_job: HarvestJob object
        :returns: A list of HarvestObject ids
        '''
        self._set_config(harvest_job.source.config)
        sets = []
        harvest_objs = []
        registry = MetadataRegistry()
        registry.registerReader('oai_dc', oai_dc_reader)
        client = oaipmh.client.Client(harvest_job.source.url, registry)
        try:
            identifier = client.identify()
        except urllib2.URLError:
            self._save_gather_error('Could not gather anything from %s!' %
                                    harvest_job.source.url, harvest_job)
            return None
        domain = identifier.repositoryName()
        group = Group.by_name(domain)
        if not group:
            group = Group(name=domain, description=domain)
        query = self.config['query'] if 'query' in self.config else ''
        try:
            for set in client.listSets():
                identifier, name, _ = set
                if 'query' in self.config:
                    if query in name:
                        sets.append((identifier, name))
                else:
                    sets.append((identifier, name))
        except NoSetHierarchyError:
            sets.append(('1', 'Default'))
            self._save_gather_error('Could not fetch sets!', harvest_job)

        for set_id, set_name in sets:
            harvest_obj = HarvestObject(job=harvest_job)
            harvest_obj.content = json.dumps(
                                             {
                                              'set': set_id, \
                                              'set_name': set_name, \
                                              'domain': domain
                                              }
                                             )
            harvest_obj.save()
            harvest_objs.append(harvest_obj.id)
        model.repo.commit()
        return harvest_objs
Пример #2
0
 def _get_group(self, domain, in_revision=True):
     group = Group.by_name(domain)
     if not group:
         if not in_revision:
             model.repo.new_revision()
         group = Group(name=domain, description=domain)
         setup_default_user_roles(group)
         group.save()
         if not in_revision:
             model.repo.commit()
     return group
Пример #3
0
    def membership_request(self, org_name):
        '''Request membership for an organization'''
        if not toolkit.request.method == 'POST':
            raise toolkit.abort(400, 'Expected POST method')

        user = toolkit.c.userobj
        if not user:
            raise toolkit.NotAuthorized('Membership request requires an user')

        organization = Group.by_name(org_name)

        comment = toolkit.request.params.get('comment')
        membership_request = MembershipRequest(user, organization, comment)

        DB.add(membership_request)
        DB.commit()

        membership_request.notify_admins()

        return self.json_response({})
Пример #4
0
    def membership_request(self, org_name):
        '''Request membership for an organization'''
        if not toolkit.request.method == 'POST':
            raise toolkit.abort(400, 'Expected POST method')

        user = toolkit.c.userobj
        if not user:
            raise toolkit.NotAuthorized('Membership request requires an user')

        organization = Group.by_name(org_name)

        comment = toolkit.request.params.get('comment')
        membership_request = MembershipRequest(user, organization, comment)

        DB.add(membership_request)
        DB.commit()

        membership_request.notify_admins()

        return self.json_response({})
    def setUp(self):
        licenses = get_voc_file(LICENSES_FILE)
        load_licenses(load_graph(licenses))
        Session.flush()

        user = User.get('dummy')

        if not user:
            user = call_action('user_create',
                               name='dummy',
                               password='******',
                               email='*****@*****.**')
            user_name = user['name']
        else:
            user_name = user.name
        org = Group.by_name('dummy')
        if org:
            self.org = org.__dict__
        else:
            self.org = call_action('organization_create',
                              context={'user': user_name},
                              name='dummy',
                              identifier='aaaaaa')
    def test_mapping(self):

        # multilang requires lang to be set
        from pylons.i18n.translation import set_lang, get_lang
        import pylons
        class dummyreq(object):
            class p(object):
                translator = object()
            environ = {'pylons.pylons': p()}
        pylons.request = dummyreq()
        pylons.translator.pylons_lang = ['en_GB']
        set_lang('en_GB')
        assert get_lang() == ['en_GB']

        assert 'dcatapit_theme_group_mapper' in config['ckan.plugins'], "No dcatapit_theme_group_mapper plugin in config"
        contents = self._get_file_contents('dataset.rdf')

        p = RDFParser(profiles=['it_dcat_ap'])

        p.parse(contents)
        datasets = [d for d in p.datasets()]
        eq_(len(datasets), 1)
        package_dict = datasets[0]


        user = User.get('dummy')
        
        if not user:
            user = call_action('user_create',
                               name='dummy',
                               password='******',
                               email='*****@*****.**')
            user_name = user['name']
        else:
            user_name = user.name
        org = Group.by_name('dummy')
        if org is None:
            org  = call_action('organization_create',
                                context={'user': user_name},
                                name='dummy',
                                identifier='aaaaaa')
        existing_g = Group.by_name('existing-group')
        if existing_g is None:
            existing_g  = call_action('group_create',
                                      context={'user': user_name},
                                      name='existing-group')

        context = {'user': '******',
                   'ignore_auth': True,
                   'defer_commit': False}
        package_schema = schema.default_create_package_schema()
        context['schema'] = package_schema
        _p = {'frequency': 'manual',
              'publisher_name': 'dummy',
              'extras': [{'key':'theme', 'value':['non-mappable', 'thememap1']}],
              'groups': [],
              'title': 'dummy',
              'holder_name': 'dummy',
              'holder_identifier': 'dummy',
              'name': 'dummy',
              'notes': 'dummy',
              'owner_org': 'dummy',
              'modified': datetime.now(),
              'publisher_identifier': 'dummy',
              'metadata_created' : datetime.now(),
              'metadata_modified': datetime.now(),
              'guid': unicode(uuid.uuid4),
              'identifier': 'dummy'}
        
        package_dict.update(_p)
        config[DCATAPIT_THEME_TO_MAPPING_SOURCE] = ''
        package_data = call_action('package_create', context=context, **package_dict)

        p = Package.get(package_data['id'])

        # no groups should be assigned at this point (no map applied)
        assert {'theme': ['non-mappable', 'thememap1']} == p.extras, '{} vs {}'.format(_p['extras'], p.extras)
        assert [] == p.get_groups(group_type='group'), 'should be {}, got {}'.format([], p.get_groups(group_type='group'))

        package_data = call_action('package_show', context=context, id=package_data['id'])

        # use test mapping, which replaces thememap1 to thememap2 and thememap3
        test_map_file = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'examples', 'test_map.ini')
        config[DCATAPIT_THEME_TO_MAPPING_SOURCE] = test_map_file

        package_dict['theme'] = ['non-mappable', 'thememap1']

        expected_groups_existing = ['existing-group']
        expected_groups_new = expected_groups_existing + ['somegroup1', 'somegroup2']
        expected_groups_multi = expected_groups_new + ['othergroup']

        package_dict.pop('extras', None)
        p = Package.get(package_data['id'])
        context['package'] = p 

        package_data = call_action('package_update',
                                   context=context,
                                   **package_dict)
        
        #meta.Session.flush()
        #meta.Session.revision = repo.new_revision()

        # check - only existing group should be assigned
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        assert expected_groups_existing == groups, (expected_groups_existing, 'vs', groups,)

        config[DCATAPIT_THEME_TO_MAPPING_ADD_NEW_GROUPS] = 'true'


        package_dict['theme'] = ['non-mappable', 'thememap1']
        package_data = call_action('package_update', context=context, **package_dict)


        meta.Session.flush()
        meta.Session.revision = repo.new_revision()

        # recheck - this time, new groups should appear
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        assert len(expected_groups_new) == len(groups), (expected_groups_new, 'vs', groups,)
        assert set(expected_groups_new) == set(groups), (expected_groups_new, 'vs', groups,)

        package_dict['theme'] = ['non-mappable', 'thememap1', 'thememap-multi']
        package_data = call_action('package_update', context=context, **package_dict)

        meta.Session.flush()
        meta.Session.revision = repo.new_revision()

        # recheck - there should be no duplicates
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        assert len(expected_groups_multi) == len(groups), (expected_groups_multi, 'vs', groups,)
        assert set(expected_groups_multi) == set(groups), (expected_groups_multi, 'vs', groups,)

        package_data = call_action('package_update', context=context, **package_dict)

        meta.Session.flush()
        meta.Session.revision = repo.new_revision()

        # recheck - there still should be no duplicates
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        assert len(expected_groups_multi) == len(groups), (expected_groups_multi, 'vs', groups,)
        assert set(expected_groups_multi) == set(groups), (expected_groups_multi, 'vs', groups,)

        meta.Session.rollback()
Пример #7
0
    def test_clean_tags(self):
        
        # Create source
        source_fixture = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
            'source_type': u'gemini-single',
            'owner_org': 'test-org',
            'metadata_created': datetime.now().strftime('%YYYY-%MM-%DD %HH:%MM:%s'),
            'metadata_modified': datetime.now().strftime('%YYYY-%MM-%DD %HH:%MM:%s'),

        }

        user = User.get('dummy')
        if not user:
            user = call_action('user_create',
                               name='dummy',
                               password='******',
                               email='*****@*****.**')
            user_name = user['name']
        else:
            user_name = user.name
        org = Group.by_name('test-org')
        if org is None:
            org  = call_action('organization_create',
                                context={'user': user_name},
                                name='test-org')
        existing_g = Group.by_name('existing-group')
        if existing_g is None:
            existing_g  = call_action('group_create',
                                      context={'user': user_name},
                                      name='existing-group')

        context = {'user': '******'} 
        package_schema = default_update_package_schema()
        context['schema'] = package_schema
        package_dict = {'frequency': 'manual',
              'publisher_name': 'dummy',
              'extras': [{'key':'theme', 'value':['non-mappable', 'thememap1']}],
              'groups': [],
              'title': 'fakename',
              'holder_name': 'dummy',
              'holder_identifier': 'dummy',
              'name': 'fakename',
              'notes': 'dummy',
              'owner_org': 'test-org',
              'modified': datetime.now(),
              'publisher_identifier': 'dummy',
              'metadata_created' : datetime.now(),
              'metadata_modified' : datetime.now(),
              'guid': unicode(uuid4()),
              'identifier': 'dummy'}
        
        package_data = call_action('package_create', context=context, **package_dict)

        package = Package.get('fakename')
        source, job = self._create_source_and_job(source_fixture)
        job.package = package
        job.guid = uuid4()
        harvester = SpatialHarvester()
        with open(os.path.join('..', 'data', 'dataset.json')) as f:
            dataset = json.load(f)

        # long tags are invalid in all cases
        TAG_LONG_INVALID = 'abcdefghij' * 20
        # if clean_tags is not set to true, tags will be truncated to 50 chars
        TAG_LONG_VALID = TAG_LONG_INVALID[:50]
        # default truncate to 100
        TAG_LONG_VALID_LONG = TAG_LONG_INVALID[:100]

        assert len(TAG_LONG_VALID) == 50
        assert TAG_LONG_VALID[-1] == 'j'
        TAG_CHARS_INVALID = '[email protected]!'
        TAG_CHARS_VALID = 'pretty-invlidtag'

        dataset['tags'].append(TAG_LONG_INVALID)
        dataset['tags'].append(TAG_CHARS_INVALID)

        harvester.source_config = {'clean_tags': False}
        out = harvester.get_package_dict(dataset, job)
        tags = out['tags']

        # no clean tags, so invalid chars are in
        # but tags are truncated to 50 chars
        assert {'name': TAG_CHARS_VALID} not in tags
        assert {'name': TAG_CHARS_INVALID} in tags
        assert {'name': TAG_LONG_VALID_LONG} in tags
        assert {'name': TAG_LONG_INVALID} not in tags

        harvester.source_config = {'clean_tags': True}

        out = harvester.get_package_dict(dataset, job)
        tags = out['tags']
        assert {'name': TAG_CHARS_VALID} in tags
        assert {'name': TAG_LONG_VALID_LONG} in tags
Пример #8
0
    def test_clean_tags(self):
        
        # Create source
        source_fixture = {
            'title': 'Test Source',
            'name': 'test-source',
            'url': u'http://127.0.0.1:8999/gemini2.1/dataset1.xml',
            'source_type': u'gemini-single',
            'owner_org': 'test-org',
            'metadata_created': datetime.now().strftime('%YYYY-%MM-%DD %HH:%MM:%s'),
            'metadata_modified': datetime.now().strftime('%YYYY-%MM-%DD %HH:%MM:%s'),

        }

        user = User.get('dummy')
        if not user:
            user = call_action('user_create',
                               name='dummy',
                               password='******',
                               email='*****@*****.**')
            user_name = user['name']
        else:
            user_name = user.name
        org = Group.by_name('test-org')
        if org is None:
            org  = call_action('organization_create',
                                context={'user': user_name},
                                name='test-org')
        existing_g = Group.by_name('existing-group')
        if existing_g is None:
            existing_g  = call_action('group_create',
                                      context={'user': user_name},
                                      name='existing-group')

        context = {'user': '******'} 
        package_schema = default_update_package_schema()
        context['schema'] = package_schema
        package_dict = {'frequency': 'manual',
              'publisher_name': 'dummy',
              'extras': [{'key':'theme', 'value':['non-mappable', 'thememap1']}],
              'groups': [],
              'title': 'fakename',
              'holder_name': 'dummy',
              'holder_identifier': 'dummy',
              'name': 'fakename',
              'notes': 'dummy',
              'owner_org': 'test-org',
              'modified': datetime.now(),
              'publisher_identifier': 'dummy',
              'metadata_created' : datetime.now(),
              'metadata_modified' : datetime.now(),
              'guid': unicode(uuid4()),
              'identifier': 'dummy'}
        
        package_data = call_action('package_create', context=context, **package_dict)

        package = Package.get('fakename')
        source, job = self._create_source_and_job(source_fixture)
        job.package = package
        job.guid = uuid4()
        harvester = SpatialHarvester()
        with open(os.path.join('..', 'data', 'dataset.json')) as f:
            dataset = json.load(f)

        # long tags are invalid in all cases
        TAG_LONG_INVALID = 'abcdefghij' * 20
        # if clean_tags is not set to true, tags will be truncated to 50 chars
        TAG_LONG_VALID = TAG_LONG_INVALID[:50]
        # default truncate to 100
        TAG_LONG_VALID_LONG = TAG_LONG_INVALID[:100]

        assert len(TAG_LONG_VALID) == 50
        assert TAG_LONG_VALID[-1] == 'j'
        TAG_CHARS_INVALID = '[email protected]!'
        TAG_CHARS_VALID = 'pretty-invlidtag'

        dataset['tags'].append(TAG_LONG_INVALID)
        dataset['tags'].append(TAG_CHARS_INVALID)

        harvester.source_config = {'clean_tags': False}
        out = harvester.get_package_dict(dataset, job)
        tags = out['tags']

        # no clean tags, so invalid chars are in
        # but tags are truncated to 50 chars
        assert {'name': TAG_CHARS_VALID} not in tags
        assert {'name': TAG_CHARS_INVALID} in tags
        assert {'name': TAG_LONG_VALID_LONG} in tags
        assert {'name': TAG_LONG_INVALID} not in tags

        harvester.source_config = {'clean_tags': True}

        out = harvester.get_package_dict(dataset, job)
        tags = out['tags']
        assert {'name': TAG_CHARS_VALID} in tags
        assert {'name': TAG_LONG_VALID_LONG} in tags
Пример #9
0
 def _fetch_import_set(self, harvest_object, master_data, client, group):
     # Could be genuine fetch or retry of set insertions.
     if 'set' in master_data:
         # Fetch stage.
         args = {self.metadata_prefix_key: self.metadata_prefix_value, 'set': master_data['set']}
         if 'from_' in master_data:
             args['from_'] = self._datetime_from_str(master_data['from_'])
         if 'until' in master_data:
             args['until'] = self._datetime_from_str(master_data['until'])
         ids = []
         try:
             for identity in client.listIdentifiers(**args):
                 ids.append(identity.identifier())
         except NoRecordsMatchError:
             return False  # Ok, empty set. Nothing to do.
         except socket.error:
             errno, errstr = sys.exc_info()[:2]
             self._save_object_error(
                 'Socket error OAI-PMH %s, details:\n%s' % (errno, errstr,),
                 harvest_object, stage='Fetch')
             return False
         except httplib.BadStatusLine:
             self._save_object_error(
                 'Bad HTTP response status line.',
                 harvest_object, stage='Fetch')
             return False
         master_data['record_ids'] = ids
     else:
         log.debug('Reinsert: %s %i' % (master_data['set_name'], len(master_data['record_ids']),))
     # Do not save to DB because we can't.
     # Import stage.
     model.repo.new_revision()
     subg_name = '%s - %s' % (group.name, master_data['set_name'],)
     subgroup = Group.by_name(subg_name)
     if not subgroup:
         subgroup = Group(name=subg_name, description=subg_name)
         setup_default_user_roles(subgroup)
         subgroup.save()
     missed = []
     for ident in master_data['record_ids']:
         pkg_name = self._package_name_from_identifier(ident)
         # Package may have been omitted due to missing metadata.
         pkg = Package.get(pkg_name)
         if pkg:
             subgroup.add_package_by_name(pkg_name)
             subgroup.save()
             if 'set' not in master_data:
                 log.debug('Inserted %s into %s' % (pkg_name, subg_name,))
         else:
             # Either omitted due to missing metadata or fetch error.
             # In the latter case, we want to add record later once the
             # fetch succeeds after retry.
             missed.append(ident)
             if 'set' not in master_data:
                 log.debug('Omitted %s from %s' % (pkg_name, subg_name,))
     if len(missed):
         # Store missing names for retry.
         master_data['record_ids'] = missed
         if 'set' in master_data:
             del master_data['set']  # Omit fetch later.
         harvest_object.content = json.dumps(master_data)
         log.debug('Missed %s %i' % (master_data['set_name'], len(missed),))
     else:
         harvest_object.content = None  # Clear data.
     model.repo.commit()
     return True
Пример #10
0
    def import_stage(self, harvest_object):
        '''
        The import stage will receive a HarvestObject object and will be
        responsible for:
            - performing any necessary action with the fetched object (e.g
              create a CKAN package).
              Note: if this stage creates or updates a package, a reference
              to the package must be added to the HarvestObject.
              Additionally, the HarvestObject must be flagged as current.
            - creating the HarvestObject - Package relation (if necessary)
            - creating and storing any suitable HarvestObjectErrors that may
              occur.
            - returning True if everything went as expected, False otherwise.

        :param harvest_object: HarvestObject object
        :returns: True if everything went right, False if errors were found
        '''
        model.repo.new_revision()
        master_data = json.loads(harvest_object.content)
        domain = master_data['domain']
        group = Group.get(domain)
        if not group:
            group = Group(name=domain, description=domain)
        if 'records' in master_data:
            records = master_data['records']
            set_name = master_data['set_name']
            for rec in records:
                identifier, metadata, _ = rec
                if metadata:
                    name = metadata['title'][0] if len(metadata['title'])\
                                                else identifier
                    title = name
                    norm_title = unicodedata.normalize('NFKD', name)\
                                 .encode('ASCII', 'ignore')\
                                 .lower().replace(' ', '_')[:35]
                    slug = ''.join(e for e in norm_title
                                    if e in string.ascii_letters + '_')
                    name = slug
                    creator = metadata['creator'][0]\
                                if len(metadata['creator']) else ''
                    description = metadata['description'][0]\
                                if len(metadata['description']) else ''
                    pkg = Package.by_name(name)
                    if not pkg:
                        pkg = Package(name=name, title=title)
                    extras = {}
                    for met in metadata.items():
                        key, value = met
                        if len(value) > 0:
                            if key == 'subject' or key == 'type':
                                for tag in value:
                                    if tag:
                                        tag = munge_tag(tag[:100])
                                        tag_obj = model.Tag.by_name(tag)
                                        if not tag_obj:
                                            tag_obj = model.Tag(name=tag)
                                        if tag_obj:
                                            pkgtag = model.PackageTag(
                                                                  tag=tag_obj,
                                                                  package=pkg)
                                            Session.add(tag_obj)
                                            Session.add(pkgtag)
                            else:
                                extras[key] = ' '.join(value)
                    pkg.author = creator
                    pkg.author_email = creator
                    pkg.title = title
                    pkg.notes = description
                    pkg.extras = extras
                    pkg.url = \
                    "%s?verb=GetRecord&identifier=%s&metadataPrefix=oai_dc"\
                                % (harvest_object.job.source.url, identifier)
                    pkg.save()
                    harvest_object.package_id = pkg.id
                    Session.add(harvest_object)
                    setup_default_user_roles(pkg)
                    url = ''
                    for ids in metadata['identifier']:
                        if ids.startswith('http://'):
                            url = ids
                    title = metadata['title'][0] if len(metadata['title'])\
                                                    else ''
                    description = metadata['description'][0]\
                                    if len(metadata['description']) else ''
                    pkg.add_resource(url, description=description, name=title)
                    group.add_package_by_name(pkg.name)
                    subg_name = "%s - %s" % (domain, set_name)
                    subgroup = Group.by_name(subg_name)
                    if not subgroup:
                        subgroup = Group(name=subg_name, description=subg_name)
                    subgroup.add_package_by_name(pkg.name)
                    Session.add(group)
                    Session.add(subgroup)
                    setup_default_user_roles(group)
                    setup_default_user_roles(subgroup)
            model.repo.commit()
        else:
            self._save_object_error('Could not receive any objects from fetch!'
                                    , harvest_object, stage='Import')
            return False
        return True
Пример #11
0
    def test_holder(self):
        org = {'name': 'org-test', 'title': 'Test org', 'identifier': 'abc'}

        pkg1 = {
            # 'id': '2b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset-1',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'modified': '2016-11-29',
            'identifier': str(uuid.uuid4()),
            'frequency': 'UPDATE_CONT',
            'publisher_name': 'bolzano',
            'publisher_identifier': '234234234',
            'creator_name': 'test',
            'creator_identifier': '789789789',
            'holder_name': 'bolzano',
            'holder_identifier': '234234234',
            FIELD_THEMES_AGGREGATE: themes_to_aggr_json(('ECON', )),
            'theme':
            json.dumps([theme_name_to_uri(name) for name in ('ECON', )]),
            'dataset_is_local': False,
            'language': '{DEU,ENG,ITA}',
        }

        pkg2 = {
            # 'id': 'eb6fe9ca-dc77-4cec-92a4-55c6624a5b00',
            'name': 'test-dataset-2',
            'title': 'Dataset di test DCAT_AP-IT 2',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'modified': '2016-11-29',
            'identifier': str(uuid.uuid4()),
            'frequency': 'UPDATE_CONT',
            'publisher_name': 'bolzano',
            'publisher_identifier': '234234234',
            'creator_name': 'test',
            'creator_identifier': '123123123123',
            FIELD_THEMES_AGGREGATE: themes_to_aggr_json(('ENVI', )),
            'theme':
            json.dumps([theme_name_to_uri(name) for name in ('ENVI', )]),
            'dataset_is_local': True,
            'language': '{DEU,ENG,ITA}',
            'owner_org': org['name'],
        }

        src_packages = [pkg1, pkg2]
        ctx = {'ignore_auth': True, 'user': self._get_user()['name']}

        org_loaded = Group.by_name(org['name'])
        if org_loaded:
            org_dict = org_loaded.__dict__
        else:
            org_dict = helpers.call_action('organization_create',
                                           context=ctx,
                                           **org)
        pkg1['owner_org'] = org_dict['id']
        pkg2['owner_org'] = org_dict['id']

        created_packages = [
            helpers.call_action('package_create', context=ctx, **pkg)
            for pkg in src_packages
        ]

        for pkg in created_packages:
            s = RDFSerializer()
            g = s.g
            dataset_ref = s.graph_from_dataset(pkg)
            has_identifier = False
            rights_holders = list(g.objects(dataset_ref, DCT.rightsHolder))

            assert len(rights_holders), 'There should be one rights holder for\n {}:\n {}'.\
                format(pkg, s.serialize_dataset(pkg))

            for holder_ref in rights_holders:
                _holder_names = list(g.objects(holder_ref, FOAF.name))
                _holder_ids = list(
                    (str(ob) for ob in g.objects(holder_ref, DCT.identifier)))

                # local dataset will use organization name only
                # while remote will have at least two names - one with lang, one default without lang
                if pkg['dataset_is_local']:
                    num_holder_names = 1
                else:
                    num_holder_names = 2
                assert len(_holder_names) == num_holder_names, _holder_names
                assert len(_holder_ids) == 1

                test_id = pkg.get(
                    'holder_identifier') or org_dict['identifier']
                has_identifier = _holder_ids[0] == test_id
                assert has_identifier, \
                    f'No identifier in {_holder_ids} (expected {test_id}) for\n {pkg}\n{s.serialize_dataset(pkg)}'
Пример #12
0
    def test_theme_to_group_mapping(self):
        # multilang requires lang to be set
        # class dummyreq(object):
        #     class p(object):
        #         translator = object()
        #     environ = {'pylons.pylons': p()}

        # CKANRequest(dummyreq)
        # pylons.request = dummyreq()
        # pylons.translator.pylons_lang = ['en_GB']

        #set_lang('en_GB')
        #assert get_lang() == ['en_GB']
        assert 'dcatapit_theme_group_mapper' in config[
            'ckan.plugins'], 'No dcatapit_theme_group_mapper plugin in config'

        with open(get_example_file('dataset.rdf'), 'r') as f:
            contents = f.read()

        p = RDFParser(profiles=['it_dcat_ap'])

        p.parse(contents)
        datasets = [d for d in p.datasets()]
        self.assertEqual(len(datasets), 1)
        package_dict = datasets[0]

        user = User.get('dummy')

        if not user:
            user = call_action('user_create',
                               name='dummy',
                               password='******',
                               email='*****@*****.**')
            user_name = user['name']
        else:
            user_name = user.name
        org = Group.by_name('dummy')
        if org is None:
            org = call_action('organization_create',
                              context={'user': user_name},
                              name='dummy',
                              identifier='aaaaaa')
        existing_g = Group.by_name('existing-group')
        if existing_g is None:
            existing_g = call_action('group_create',
                                     context={'user': user_name},
                                     name='existing-group')

        context = {'user': '******', 'ignore_auth': True, 'defer_commit': False}
        package_schema = schema.default_create_package_schema()
        context['schema'] = package_schema
        _p = {
            'frequency': 'manual',
            'publisher_name': 'dummy',
            'extras': [{
                'key': 'theme',
                'value': ['non-mappable', 'thememap1']
            }],
            'groups': [],  #  [{'name':existing_g.name}],
            'title': 'dummy',
            'holder_name': 'dummy',
            'holder_identifier': 'dummy',
            'name': 'dummy-' + uuid4().hex,
            'identifier': 'dummy' + uuid4().hex,
            'notes': 'dummy',
            'owner_org': 'dummy',
            'modified': datetime.now(),
            'publisher_identifier': 'dummy',
            'metadata_created': datetime.now(),
            'metadata_modified': datetime.now(),
            'guid': str(uuid.uuid4),
        }

        package_dict.update(_p)

        config[DCATAPIT_THEME_TO_MAPPING_SOURCE] = ''
        config[DCATAPIT_THEME_TO_MAPPING_ADD_NEW_GROUPS] = 'false'

        package_data = call_action('package_create',
                                   context=context,
                                   **package_dict)

        p = Package.get(package_data['id'])

        # no groups should be assigned at this point (no map applied)
        assert {
            'theme': ['non-mappable', 'thememap1']
        } == p.extras, '{} vs {}'.format(_p['extras'], p.extras)
        assert [] == p.get_groups(
            group_type='group'), 'should be {}, got {}'.format(
                [], p.get_groups(group_type='group'))

        package_data = call_action('package_show',
                                   context=context,
                                   id=package_data['id'])

        # use test mapping, which replaces thememap1 to thememap2 and thememap3
        test_map_file = os.path.join(os.path.dirname(__file__), '..', '..',
                                     '..', 'examples', 'test_map.ini')

        config[DCATAPIT_THEME_TO_MAPPING_SOURCE] = test_map_file
        config[DCATAPIT_THEME_TO_MAPPING_ADD_NEW_GROUPS] = 'false'

        # package_dict['theme'] = ['non-mappable', 'thememap1']

        package_dict.pop('extras', None)
        p = Package.get(package_data['id'])
        context['package'] = p

        package_data = call_action('package_update',
                                   context=context,
                                   **package_dict)

        # check - only existing group should be assigned
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        # the map file maps ECON to existing group, and 2 other unexisting groups that will not be created
        expected_groups = ['existing-group']
        self.assertSetEqual(set(expected_groups), set(groups),
                            'Error in assigned groups')

        config[DCATAPIT_THEME_TO_MAPPING_SOURCE] = test_map_file
        config[DCATAPIT_THEME_TO_MAPPING_ADD_NEW_GROUPS] = 'true'

        # package_dict['theme'] = ['non-mappable', 'thememap1']
        package_data = call_action('package_update',
                                   context=context,
                                   **package_dict)

        meta.Session.flush()

        # recheck - this time, new groups should appear
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        # the map file maps ECON to existing group and 2 other groups that have been automatically created
        expected_groups = expected_groups + ['somegroup1', 'somegroup2']
        self.assertSetEqual(set(expected_groups), set(groups), 'Groups differ')

        # package_dict['theme'] = ['non-mappable', 'thememap1', 'thememap-multi']
        aggr = json.loads(package_dict[FIELD_THEMES_AGGREGATE])
        aggr.append({'theme': 'thememap-multi', 'subthemes': []})
        package_dict[FIELD_THEMES_AGGREGATE] = json.dumps(aggr)

        package_data = call_action('package_update',
                                   context=context,
                                   **package_dict)

        meta.Session.flush()

        # recheck - there should be no duplicates
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        # added theme 'thememap-multi', that maps to 'othergroup' and other already exisintg groups
        expected_groups = expected_groups + ['othergroup']
        self.assertEqual(len(expected_groups), len(groups),
                         'New groups differ - there may be duplicated groups')
        self.assertSetEqual(set(expected_groups), set(groups),
                            'New groups differ')

        package_data = call_action('package_update',
                                   context=context,
                                   **package_dict)

        meta.Session.flush()

        # recheck - there still should be no duplicates
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        self.assertEqual(len(expected_groups), len(groups),
                         'New groups differ - there may be duplicated groups')
        self.assertSetEqual(set(expected_groups), set(groups),
                            'New groups differ')

        meta.Session.rollback()