Exemplo n.º 1
0
def add_to_group(key, data, errors, context):
    val = data.get(key)
    if val:
        repo.new_revision()
        grp = Group.get(val)
        grp.add_package_by_name(data[('name',)])
        grp.save()
Exemplo n.º 2
0
def package_show(context, data_dict):
    '''
    Return the metadata of a dataset (package) and its resources.

    Called before showing the dataset in some interface (browser, API),
    or when adding package to Solr index (no validation / conversions then).

    :param id: the id or name of the dataset
    :type id: string

    :rtype: dictionary
    '''

    if data_dict.get('type') == 'harvest':
        context['schema'] = Schemas.harvest_source_show_package_schema()

    if not data_dict.get('id') and not data_dict.get('name'):
        # Get package by data PIDs
        data_dict['id'] = utils.get_package_id_by_data_pids(data_dict)

    pkg_dict1 = ckan.logic.action.get.package_show(context, data_dict)
    pkg_dict1 = utils.resource_to_dataset(pkg_dict1)

    # Remove empty agents that come from padding the agent list in converters
    if 'agent' in pkg_dict1:
        agents = filter(None, pkg_dict1.get('agent', []))
        pkg_dict1['agent'] = agents or []

    # Normally logic function should not catch the raised errors
    # but here it is needed so action package_show won't catch it instead
    # Hiding information from API calls
    try:
        check_access('package_update', context)
    except NotAuthorized:
        pkg_dict1 = utils.hide_sensitive_fields(pkg_dict1)

    pkg = Package.get(pkg_dict1['id'])
    if 'erelated' in pkg.extras:
        erelated = pkg.extras['erelated']
        if len(erelated):
            for value in erelated.split(';'):
                if len(Session.query(Related).filter(Related.title == value).all()) == 0:
                    data_dict = {'title': value,
                                 'type': _("Paper"),
                                 'dataset_id': pkg.id}
                    related_create(context, data_dict)

    # Update package.title to match package.extras.title_0
    extras_title = pkg.extras.get(u'title_0')
    if extras_title and extras_title != pkg.title:
        repo.new_revision()
        pkg.title = pkg.extras[u'title_0']
        pkg.save()
        rebuild(pkg.id)  # Rebuild solr-index for this dataset

    return pkg_dict1
Exemplo n.º 3
0
 def initdb(self):
     kata = Group.get('KATA')
     if not kata:
         repo.new_revision()
         kata = Group(name="KATA", title="Tieteenalat")
         kata.save()
         for tiede in tieteet.tieteet:
             t = Group(description=tiede['description'],
                       name=tiede['name'],
                       title=tiede['title'])
             t.save()
             m = Member(group=kata, table_id=t.id, table_name="group")
             m.save()
     setup()
Exemplo n.º 4
0
def update_organization_identifier(org_id, org_identifier):
    s = Session
    s.revision = getattr(s, 'revision', None) or repo.new_revision()
    g = s.query(Group).filter(Group.id == org_id).one()
    g.extras['identifier'] = org_identifier
    s.add(g)
    s.flush()
Exemplo n.º 5
0
def add_to_group(key, data, errors, context):
    '''
    Add a new group if it doesn't yet exist.

    :param key: key
    :param data: data
    :param errors: validation errors
    :param context: context
    '''
    val = data.get(key)
    if val:
        repo.new_revision()
        grp = Group.get(val)
        # UI code needs group created if it does not match. Hence do so.
        if not grp:
            grp = Group(name=val, description=val, title=val)
            setup_default_user_roles(grp)
            grp.save()
        repo.commit()
Exemplo n.º 6
0
    def add_for_theme(cls, g, theme_ref, subtheme_ref, parent=None):
        theme = cls.normalize_theme(theme_ref)
        existing = cls.q().filter_by(uri=str(subtheme_ref)).first()
        theme_tag = ThemeToSubtheme.get_tag(theme)

        revision = getattr(Session, 'revision', None) or repo.new_revision()

        # several themes may refer to this subtheme, so we'll just return
        # exising instance
        if existing:
            if not theme_tag in existing.themes:
                existing.themes.append(theme_tag)
            Session.flush()
            Session.revision = revision
            log.error("Subtheme %s already exists. Skipping", subtheme_ref)
            return existing

        labels = {}
        for l in g.objects(subtheme_ref, SKOS.prefLabel):
            labels[l.language] = unicode(l)
        if not labels:
            log.error("NO labels for %s. Skipping", subtheme_ref)
            return
        version = g.value(subtheme_ref, OWL.versionInfo) or ''
        identifier = g.value(subtheme_ref, DCT.identifier) or ''
        default_label = labels[DEFAULT_LANG]
        inst = cls(version=str(version),
                   identifier=str(identifier),
                   uri=str(subtheme_ref),
                   default_label=default_label,
                   parent_id=parent.id if parent else None,
                   depth=parent.depth + 1 if parent else 0)
        inst.update_path()
        Session.add(inst)
        Session.flush()
        Session.revision = revision

        if parent is None:
            inst.parent_id = inst.id

        theme_m = ThemeToSubtheme(tag_id=theme_tag.id, subtheme_id=inst.id)
        Session.add(theme_m)

        for lang, label in labels.items():
            l = SubthemeLabel(subtheme_id=inst.id, lang=lang, label=label)
            Session.add(l)
        Session.flush()
        Session.revision = revision
        # handle children

        for child in g.objects(subtheme_ref, SKOS.hasTopConcept):
            cls.add_for_theme(g, theme_ref, child, inst)

        return inst
Exemplo n.º 7
0
def package_show(context, data_dict):
    pkg_dict1 = ckan.logic.action.get.package_show(context, data_dict)
    pkg = Package.get(pkg_dict1['id'])
    if 'erelated' in pkg.extras:
        erelated = pkg.extras['erelated']
        if len(erelated):
            for value in erelated.split(';'):
                if len(Session.query(Related).filter(Related.title == value).all()) == 0:
                    data_dict = {'title': value,
                                 'type': _("Paper"),
                                 'dataset_id': pkg.id}
                    related_create(context, data_dict)
    for key in pkg.extras.keys():
        if TITLE_MATCH.match(key):
            repo.new_revision()
            pkg.title = pkg.extras[key]
            pkg_dict1['title'] = pkg.extras[key]
            pkg.save()
            break
    return pkg_dict1
    def _make_harvest_object(self, mock_url, groups):
        source_dict = {
            'title': 'Test RDF DCAT Source',
            'name': 'test-rdf-dcat-source',
            'url': mock_url,
            'source_type': 'dcat_rdf',
            'created': datetime.now(),
            'metadata_created': datetime.now(),
        }
        default_ctx = {'ignore_auth': True,
                       'defer_commit': False}
        harvest_source = helpers.call_action('harvest_source_create',
                                       default_ctx, **source_dict)

        Session.flush()
        Session.revision = repo.new_revision()
        harvest_job = helpers.call_action('harvest_job_create',
                                    default_ctx,
                                    source_id=harvest_source['id'],
                                    )

        hdata = {'groups': groups}
        Session.flush()
        Session.revision = repo.new_revision()

        harvest_object = helpers.call_action('harvest_object_create',
                                    default_ctx,
                                    job_id=harvest_job['id'],
                                    )
        

        Session.flush()
        Session.revision = repo.new_revision()

        hobj = HarvestObject.get(harvest_object['id'])
        hobj.content = json.dumps(hdata)
        return hobj
Exemplo n.º 9
0
def set_system_info(key, value):
    ''' save data in the system_info table '''
    obj = None
    obj = meta.Session.query(SystemInfo).filter_by(key=key).first()
    if obj and obj.value == unicode(value):
        return
    if not obj:
        obj = SystemInfo(key, value)
    else:
        obj.value = unicode(value)

    from ckan.model import repo
    rev = repo.new_revision()
    rev.message = 'Set {0} setting in system_info table'.format(key)
    meta.Session.add(obj)
    meta.Session.commit()

    return True
Exemplo n.º 10
0
def set_system_info(key, value):
    ''' save data in the system_info table '''
    obj = None
    obj = meta.Session.query(SystemInfo).filter_by(key=key).first()
    if obj and obj.value == text_type(value):
        return
    if not obj:
        obj = SystemInfo(key, value)
    else:
        obj.value = text_type(value)

    from ckan.model import repo
    rev = repo.new_revision()
    rev.message = 'Set {0} setting in system_info table'.format(key)
    meta.Session.add(obj)
    meta.Session.commit()

    return True
    def test_mapping(self):

        # multilang requires lang to be set
        from pylons.i18n.translation import set_lang, get_lang
        import pylons
        class dummyreq(object):
            class p(object):
                translator = object()
            environ = {'pylons.pylons': p()}
        pylons.request = dummyreq()
        pylons.translator.pylons_lang = ['en_GB']
        set_lang('en_GB')
        assert get_lang() == ['en_GB']

        assert 'dcatapit_theme_group_mapper' in config['ckan.plugins'], "No dcatapit_theme_group_mapper plugin in config"
        contents = self._get_file_contents('dataset.rdf')

        p = RDFParser(profiles=['it_dcat_ap'])

        p.parse(contents)
        datasets = [d for d in p.datasets()]
        eq_(len(datasets), 1)
        package_dict = datasets[0]


        user = User.get('dummy')
        
        if not user:
            user = call_action('user_create',
                               name='dummy',
                               password='******',
                               email='*****@*****.**')
            user_name = user['name']
        else:
            user_name = user.name
        org = Group.by_name('dummy')
        if org is None:
            org  = call_action('organization_create',
                                context={'user': user_name},
                                name='dummy',
                                identifier='aaaaaa')
        existing_g = Group.by_name('existing-group')
        if existing_g is None:
            existing_g  = call_action('group_create',
                                      context={'user': user_name},
                                      name='existing-group')

        context = {'user': '******',
                   'ignore_auth': True,
                   'defer_commit': False}
        package_schema = schema.default_create_package_schema()
        context['schema'] = package_schema
        _p = {'frequency': 'manual',
              'publisher_name': 'dummy',
              'extras': [{'key':'theme', 'value':['non-mappable', 'thememap1']}],
              'groups': [],
              'title': 'dummy',
              'holder_name': 'dummy',
              'holder_identifier': 'dummy',
              'name': 'dummy',
              'notes': 'dummy',
              'owner_org': 'dummy',
              'modified': datetime.now(),
              'publisher_identifier': 'dummy',
              'metadata_created' : datetime.now(),
              'metadata_modified': datetime.now(),
              'guid': unicode(uuid.uuid4),
              'identifier': 'dummy'}
        
        package_dict.update(_p)
        config[DCATAPIT_THEME_TO_MAPPING_SOURCE] = ''
        package_data = call_action('package_create', context=context, **package_dict)

        p = Package.get(package_data['id'])

        # no groups should be assigned at this point (no map applied)
        assert {'theme': ['non-mappable', 'thememap1']} == p.extras, '{} vs {}'.format(_p['extras'], p.extras)
        assert [] == p.get_groups(group_type='group'), 'should be {}, got {}'.format([], p.get_groups(group_type='group'))

        package_data = call_action('package_show', context=context, id=package_data['id'])

        # use test mapping, which replaces thememap1 to thememap2 and thememap3
        test_map_file = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'examples', 'test_map.ini')
        config[DCATAPIT_THEME_TO_MAPPING_SOURCE] = test_map_file

        package_dict['theme'] = ['non-mappable', 'thememap1']

        expected_groups_existing = ['existing-group']
        expected_groups_new = expected_groups_existing + ['somegroup1', 'somegroup2']
        expected_groups_multi = expected_groups_new + ['othergroup']

        package_dict.pop('extras', None)
        p = Package.get(package_data['id'])
        context['package'] = p 

        package_data = call_action('package_update',
                                   context=context,
                                   **package_dict)
        
        #meta.Session.flush()
        #meta.Session.revision = repo.new_revision()

        # check - only existing group should be assigned
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        assert expected_groups_existing == groups, (expected_groups_existing, 'vs', groups,)

        config[DCATAPIT_THEME_TO_MAPPING_ADD_NEW_GROUPS] = 'true'


        package_dict['theme'] = ['non-mappable', 'thememap1']
        package_data = call_action('package_update', context=context, **package_dict)


        meta.Session.flush()
        meta.Session.revision = repo.new_revision()

        # recheck - this time, new groups should appear
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        assert len(expected_groups_new) == len(groups), (expected_groups_new, 'vs', groups,)
        assert set(expected_groups_new) == set(groups), (expected_groups_new, 'vs', groups,)

        package_dict['theme'] = ['non-mappable', 'thememap1', 'thememap-multi']
        package_data = call_action('package_update', context=context, **package_dict)

        meta.Session.flush()
        meta.Session.revision = repo.new_revision()

        # recheck - there should be no duplicates
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        assert len(expected_groups_multi) == len(groups), (expected_groups_multi, 'vs', groups,)
        assert set(expected_groups_multi) == set(groups), (expected_groups_multi, 'vs', groups,)

        package_data = call_action('package_update', context=context, **package_dict)

        meta.Session.flush()
        meta.Session.revision = repo.new_revision()

        # recheck - there still should be no duplicates
        p = Package.get(package_data['id'])
        groups = [g.name for g in p.get_groups(group_type='group')]

        assert len(expected_groups_multi) == len(groups), (expected_groups_multi, 'vs', groups,)
        assert set(expected_groups_multi) == set(groups), (expected_groups_multi, 'vs', groups,)

        meta.Session.rollback()
    def test_graph_from_dataset(self):

        conforms_to_in = [{'identifier': 'CONF1',
                                       'uri': 'conf01',
                                 'title': {'en': 'title', 'it': 'title'},
                                 'referenceDocumentation': ['http://abc.efg/'],},
                                {'identifier': 'CONF2',
                                 'title': {'en': 'title', 'it': 'title'},
                                 'description': {'en': 'descen', 'it': 'descit'},
                                 'referenceDocumentation': ['http://abc.efg/'],},
                                 ]

        alternate_identifiers = [{'identifier': 'aaaabc',
                                 'agent': {'agent_identifier': 'agent01',
                                           'agent_name': {'en': 'Agent en 01', 'it': 'Agent it 01'}},
                                 },
                                 {'identifier': 'other identifier', 'agent': {}}]
        creators = [{'creator_name': {'en': 'abc'}, 'creator_identifier': "ABC"},
                    {'creator_name': {'en': 'cde'}, 'creator_identifier': "CDE"},
                    ]

        temporal_coverage = [{'temporal_start': '2001-01-01', 'temporal_end': '2001-02-01 10:11:12'},
                             {'temporal_start': '2001-01-01', 'temporal_end': '2001-02-01 11:12:13'},
                            ]

        subthemes = [{'theme': 'AGRI', 'subthemes': ['http://eurovoc.europa.eu/100253',
                                                     'http://eurovoc.europa.eu/100258']},
                     {'theme': 'ENVI', 'subthemes': []}]

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'issued':'2016-11-29',
            'modified':'2016-11-29',
            'identifier':'ISBN',
            'temporal_start':'2016-11-01',
            'temporal_end':'2016-11-30',
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'alternate_identifier':json.dumps(alternate_identifiers),
            'temporal_coverage': json.dumps(temporal_coverage),
            #'theme':'ECON',
            'geographical_geonames_url':'http://www.geonames.org/3181913',
            'language':'{DEU,ENG,ITA}',
            'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
            'conforms_to':json.dumps(conforms_to_in),
            'creator': json.dumps(creators),
            'theme': json.dumps(subthemes),


        }
        
        pkg_id = dataset['id']
        
        pub_names = {'it': 'IT publisher',
                     'es': 'EN publisher'}
        holder_names = {'it': 'IT holder name',
                        'es': 'EN holder name'}

        multilang_fields = [('publisher_name', 'package', k, v) for k, v in pub_names.items()] +\
                           [('holder_name', 'package', k, v) for k, v in holder_names.items()]
        
        pkg = helpers.call_action('package_create', {'defer_commit': True}, **dataset)
        rev = getattr(Session,  'revision', repo.new_revision())
        Session.flush()
        Session.revision = rev
        pkg_id = pkg['id']

        for field_name, field_type, lang, text in multilang_fields:
            interfaces.upsert_package_multilang(pkg_id, field_name, field_type, lang, text)

        loc_dict = interfaces.get_for_package(pkg_id)
        #assert loc_dict['publisher_name'] == pub_names
        #assert loc_dict['holder_name'] == holder_names


        # temporary bug for comaptibility with interfaces.get_language(),
        # which will return lang[0]
        pub_names.update({DEFAULT_LANG: dataset['publisher_name']})
        # pub_names.update({DEFAULT_LANG[0]: dataset['publisher_name']})
        holder_names.update({DEFAULT_LANG: dataset['holder_name']})
        # holder_names.update({DEFAULT_LANG[0]: dataset['holder_name']})
        
        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(unicode(dataset_ref), utils.dataset_uri(dataset))

        # Basic fields
        assert self._triple(g, dataset_ref, RDF.type, DCATAPIT.Dataset)
        assert self._triple(g, dataset_ref, DCT.title, dataset['title'])
        assert self._triple(g, dataset_ref, DCT.description, dataset['notes'])

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['identifier'])

        # Tags
        eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2)
        for tag in dataset['tags']:
            assert self._triple(g, dataset_ref, DCAT.keyword, tag['name'])
        
        # conformsTo
        conforms_to = list(g.triples((None, DCT.conformsTo, None)))
        assert conforms_to

        conforms_to_dict = dict((d['identifier'], d) for d in conforms_to_in)
        for conf in conforms_to:
            conf_id = conf[-1]

            identifier = g.value(conf_id, DCT.identifier)
            titles = list(g.objects(conf_id, DCT.title))
            descs = list(g.objects(conf_id, DCT.description))
            references = list(g.objects(conf_id, DCATAPIT.referenceDocumentation))
            
            check = conforms_to_dict.get(str(identifier))
            
            assert isinstance(check, dict)

            if check.get('uri'):
                assert check['uri'] == str(conf_id)
            assert len(titles), "missing titles"
            
            assert (len(descs)> 0) == bool(check.get('description')), "missing descriptions"

            for title in titles:
                tlang = title.language
                tval = str(title)
                assert tval == check['title'][tlang], (tlang, tval, check['title'])

            for desc in descs:
                tlang = desc.language
                tval = str(desc)
                assert tval == check['description'][tlang], (tlang, str(tval), check['description'])
            
            ref_docs = check.get('referenceDocumentation')
            assert len(references) == len(ref_docs), "missing reference documentation"
            
            for dref in references:
                assert str(dref) in ref_docs, "{} not in {}".format(dref, ref_docs)
                                                                
            for ref in ref_docs:
                assert URIRef(ref) in references

        # alternate identifiers
        alt_ids = [a[-1] for a in g.triples((None, ADMS.identifier, None))]
        alt_ids_dict = dict((a['identifier'], a) for a in alternate_identifiers)

        for alt_id in alt_ids:
            identifier = g.value(alt_id, SKOS.notation)
            check = alt_ids_dict[str(identifier)]
            assert str(identifier) == check['identifier']
            if check.get('agent'):
                agent_ref = g.value(alt_id, DCT.creator)
                assert agent_ref is not None

                agent_identifier = g.value(agent_ref, DCT.identifier)

                agent_name = dict((v.language, str(v)) for v in g.objects(agent_ref, FOAF.name))
                
                assert set(agent_name.items()) == set(check['agent']['agent_name'].items()),\
                    "expected {}, got {} for {}".format(check['agent']['agent_name'], agent_name, agent_ref)

                assert str(agent_identifier) == check['agent']['agent_identifier'],\
                    "expected {}, got {}".format(check['agent']['agent_identifier'], agent_identifier)
        # creators
        creators.append({'creator_name':{'en': 'test'},
                         'creator_identifier':'412946129'})
        creators_in = list(g.objects(dataset_ref, DCT.creator))
        assert len(creators) == len(creators_in)

        for cref in creators_in:
            cnames = dict((str(c.language) if c.language else DEFAULT_LANG, str(c)) for c in g.objects(cref, FOAF.name))
            c_identifier = g.value(cref, DCT.identifier)
            c_dict = {'creator_name': cnames,
                      'creator_identifier': str(c_identifier)}
            assert c_dict in creators, "no {} in {}".format(c_dict, creators)

        # temporal coverage
        temporal_coverage.append({'temporal_start': dataset['temporal_start'],
                                  'temporal_end': dataset['temporal_end']})
        temp_exts = list(g.triples((dataset_ref, DCT.temporal, None)))
        assert len(temp_exts) == len(temporal_coverage)
        
        # normalize values
        for item in temporal_coverage:
            for k, v in item.items():
                item[k] = pdate(v)

        temp_ext = []
        for interval_t in temp_exts:
            interval = interval_t[-1]
            start = g.value(interval, SCHEMA.startDate)
            end = g.value(interval, SCHEMA.endDate)
            assert start is not None
            assert end is not None
            temp_ext.append({'temporal_start': pdate(str(start)),
                             'temporal_end': pdate(str(end))})

        set1 = set([tuple(d.items()) for d in temp_ext])
        set2 = set([tuple(d.items()) for d in temporal_coverage])
        assert set1 == set2, "Got different temporal coverage sets: \n{}\n vs\n {}".format(set1, set2)

        for pub_ref in g.objects(dataset_ref, DCT.publisher):
            _pub_names = list(g.objects(pub_ref, FOAF.name))

            assert len(_pub_names) 

            for pub_name in _pub_names:
                if pub_name.language:
                    assert str(pub_name.language) in pub_names, "no {} in {}".format(pub_name.language, pub_names)
                    assert pub_names[str(pub_name.language)] == str(pub_name), "{} vs {}".format(pub_name, pub_names)

        for holder_ref in g.objects(dataset_ref, DCT.rightsHolder):
            _holder_names = list(g.objects(holder_ref, FOAF.name))

            assert len(_holder_names) 

            for holder_name in _holder_names:
                if holder_name.language:
                    assert str(holder_name.language) in holder_names, "no {} in {}".format(holder_name.language, holder_names)
                    assert holder_names[str(holder_name.language)] == str(holder_name), "{} vs {}".format(holder_name, holder_names)