示例#1
0
    def test_subthemes(self):

        load_themes()

        subthemes = [{
            'theme':
            'AGRI',
            'subthemes': [
                'http://eurovoc.europa.eu/100253',
                'http://eurovoc.europa.eu/100258'
            ]
        }, {
            'theme': 'ENVI',
            'subthemes': []
        }]

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{
                'name': 'Tag 1'
            }, {
                'name': 'Tag 2'
            }],
            'issued': '2016-11-29',
            'modified': '2016-11-29',
            'frequency': 'UPDATE_CONT',
            'publisher_name': 'bolzano',
            'publisher_identifier': '234234234',
            'creator_name': 'test',
            'creator_identifier': '412946129',
            'holder_name': 'bolzano',
            'holder_identifier': '234234234',
            'alternate_identifier': 'ISBN,TEST',
            'theme': json.dumps(subthemes),
        }

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])

        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())

        assert len(datasets) == 1
        d = datasets[0]
        themes = json.loads(dataset['theme'])
        assert (len(themes) == len(subthemes) == 2)
        for t in themes:
            if t['theme'] == 'ENVI':
                assert t['subthemes'] == []
            elif t['theme'] == 'AGRI':
                assert set(t['subthemes']) == set(subthemes[0]['subthemes'])
            else:
                assert False, "Unknown theme: {}".format(t)
    def test_creators(self):

        creators = [{'creator_name': {DEFAULT_LANG: 'abc', 'it': 'abc it'}, 'creator_identifier': "ABC"},
                    {'creator_name': {DEFAULT_LANG: 'cde', 'it': 'cde it'}, 'creator_identifier': "CDE"},
                    ]
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'issued':'2016-11-29',
            'modified':'2016-11-29',
            'identifier':'ISBN',
            'temporal_start':'2016-11-01',
            'temporal_end':'2016-11-30',
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'alternate_identifier':'ISBN,TEST',
            'theme':'{ECON,ENVI}',
            'geographical_geonames_url':'http://www.geonames.org/3181913',
            'language':'{DEU,ENG,ITA}',
            'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
            'creator': json.dumps(creators)
        }

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])
        
        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())
        
        assert len(datasets) == 1
        d = datasets[0]
        creators.append({'creator_identifier': dataset['creator_identifier'],
                              'creator_name': {DEFAULT_LANG: dataset['creator_name']}})

        creators_dict = dict((v['creator_identifier'], v) for v in creators)

        creators_in = json.loads(d['creator'])

        for c in creators_in:
            assert c['creator_identifier'] in creators_dict.keys(), "no {} key in {}".format(c['creator_identifier'],
                                                                                             creators_dict.keys())
            assert c['creator_name'] == creators_dict[c['creator_identifier']]['creator_name'],\
                "{} vs {}".format(c['creator_name'], creators_dict[c['creator_identifier']]['creator_name'])
        for c in creators_dict.keys():
            assert c in [_c['creator_identifier'] for _c in creators_in]
            cdata = creators_dict[c]
            assert cdata in creators_in
示例#3
0
def export_package_to_rdf(package_dict, _format='xml'):
    """Exports a package metadata in RDF in the specified format.

    :param dict package_dict: the package metadata.
    :param str _format: the desired format to export to. Default is ``xml``.
    """
    serializer = RDFSerializer()
    return serializer.serialize_dataset(package_dict, _format=_format)
    def test_temporal_coverage(self):

        load_themes()
        temporal_coverage = [{'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'},
                             {'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'},
                            ]
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'issued':'2016-11-29',
            'modified':'2016-11-29',
            'identifier':'ISBN',
            'temporal_start':'2016-11-01T00:00:00',
            'temporal_end':'2016-11-30T00:00:00',
            'temporal_coverage': json.dumps(temporal_coverage),
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'alternate_identifier':'ISBN,TEST',
            'theme':'{ECON,ENVI}',
            'geographical_geonames_url':'http://www.geonames.org/3181913',
            'language':'{DEU,ENG,ITA}',
            'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
        }

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])
        
        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())
        
        assert len(datasets) == 1
        d = datasets[0]

        temporal_coverage.append({'temporal_start': dataset['temporal_start'],
                                  'temporal_end': dataset['temporal_end']})

        try:
            validators.dcatapit_temporal_coverage(d['temporal_coverage'], {})
            # this should not raise exception
            assert True
        except validators.Invalid, err:
            assert False, "Temporal coverage should be valid: {}".format(err)
示例#5
0
def dcat_dataset_show(context, data_dict):

    toolkit.check_access('dcat_dataset_show', context, data_dict)

    dataset_dict = toolkit.get_action('package_show')(context, data_dict)
    serializer = RDFSerializer()

    output = serializer.serialize_dataset(dataset_dict,
                                          _format=data_dict.get('format'))

    return output
示例#6
0
def dcat_dataset_show(context, data_dict):

    toolkit.check_access('dcat_dataset_show', context, data_dict)

    dataset_dict = toolkit.get_action('package_show')(context, data_dict)

    serializer = RDFSerializer(profiles=data_dict.get('profiles'))

    output = serializer.serialize_dataset(dataset_dict,
                                          _format=data_dict.get('format'))

    return output
示例#7
0
def dcat_markup_dataset_show(context, data_dict):

    p.toolkit.check_access('dcat_dataset_show', context, data_dict)

    dataset_dict = p.toolkit.get_action('package_show')(context, data_dict)
    #print dataset_dict
    dataset_dict['notes'] = dataset_dict['title']
    #print dataset_dict

    serializer = RDFSerializer(profiles=data_dict.get('profiles'))

    output = serializer.serialize_dataset(dataset_dict,
                                          _format=data_dict.get('format'))

    return output
    def test_conforms_to(self):

        conforms_to_in = [{'identifier': 'CONF1',
                                       'uri': 'http://conf01/abc',
                                 'title': {'en': 'title', 'it': 'title'},
                                 'referenceDocumentation': ['http://abc.efg/'],},
                                {'identifier': 'CONF2',
                                 'title': {'en': 'title', 'it': 'title'},
                                 'description': {'en': 'descen', 'it': 'descit'},
                                 'referenceDocumentation': ['http://abc.efg/'],},
                                 ]
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'issued':'2016-11-29',
            'modified':'2016-11-29',
            'identifier':'ISBN',
            'temporal_start':'2016-11-01',
            'temporal_end':'2016-11-30',
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'alternate_identifier':'ISBN,TEST',
            'theme':'{ECON,ENVI}',
            'geographical_geonames_url':'http://www.geonames.org/3181913',
            'language':'{DEU,ENG,ITA}',
            'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
            'conforms_to':json.dumps(conforms_to_in)
        }

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])
        
        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())
        
        assert len(datasets) == 1
        d = datasets[0]

        conforms_to = dict((d['identifier'], d) for d in conforms_to_in)
        dataset_conforms_to = json.loads(d['conforms_to'])

        assert len(dataset_conforms_to) == len(conforms_to_in), "got {}, should be {}".format(len(d['conforms_to']), len(conforms_to_in))
        for conf in dataset_conforms_to:
            check = conforms_to[conf['identifier']]
            for k,v  in check.items():
                # there should be no empty uri 
                if k == 'uri' and not v:
                    assert conf.get(k) is None
                else:
                    assert conf.get(k) == v
            for k, v in conf.items():
                src_v = check.get(k)
                # ref may be extracted from rdf, but it can be
                # generated by serializer
                if not src_v and k == 'uri':
                    continue
                # no value, may be missing key in source
                elif not src_v:
                    assert not check.get(k)
                else:
                    assert check[k] == v
    def test_license(self):
        
        def get_path(fname):
            return os.path.join(os.path.dirname(__file__),
                        '..', '..', '..', 'examples', fname)
        licenses = get_path('licenses.rdf')
        load_from_graph(path=licenses)
        Session.flush()


        dataset = {'title': 'some title',
                   'id': 'sometitle',
                   'resources': [
                            {
                                'id': 'resource/1111',
                                'uri': 'http://resource/1111',
                                'license_type': 'invalid',
                            },
                            {
                                'id': 'resource/2222',
                                'uri': 'http://resource/2222',
                                'license_type': 'https://w3id.org/italia/controlled-vocabulary/licences/A311_GFDL13'
                            }
                        ]
                    }
       

        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])

        s = RDFSerializer()


        dataset_ref = s.graph_from_dataset(dataset)

        g = s.g

        r1 = URIRef(dataset['resources'][0]['uri'])
        r2 = URIRef(dataset['resources'][1]['uri'])

        unknown = License.get(License.DEFAULT_LICENSE)

        license_ref = g.value(r1, DCT.license)
        
        assert license_ref is not None
        assert str(license_ref) == unknown.uri,\
            "got license {}, instead of {}".format(license_ref, unknown.license_type)

        gpl = License.get(dataset['resources'][1]['license_type'])
        assert gpl is not None

        license_ref = g.value(r2, DCT.license)
        license_type = g.value(license_ref, DCT.type)
        
        assert license_ref is not None

        assert str(license_ref) == gpl.document_uri
        assert str(license_type) == gpl.license_type

        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())
        assert len(datasets) == 1
        new_dataset = datasets[0]
        resources = new_dataset['resources']

        def _find_res(res_uri):
            for res in resources:
                if res_uri == res['uri']:
                    return res
            raise ValueError("No resource for {}".format(res_uri))

        new_res_unknown = _find_res(str(r1))
        new_res_gpl = _find_res(str(r2))

        assert new_res_unknown['license_type'] == unknown.uri, (new_res_unknown['license_type'], unknown.uri,)
        assert new_res_gpl['license_type'] == dataset['resources'][1]['license_type']
    def test_holder(self):
        org = {'name': 'org-test',
               'title': 'Test org',
               'identifier': "abc"}
        
        pkg1 = {
            'id': '2b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset-1',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'modified':'2016-11-29',
            'identifier':'ISBNabc',
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'theme':'{ECON,ENVI}',
            'dataset_is_local': False,
            'language':'{DEU,ENG,ITA}',
        }
        
        pkg2 = {
            'id': 'eb6fe9ca-dc77-4cec-92a4-55c6624a5b00',
            'name': 'test-dataset-2',
            'title': 'Dataset di test DCAT_AP-IT 2',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'modified':'2016-11-29',
            'identifier':'ISBNcde',
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'theme':'{ECON,ENVI}',
            'dataset_is_local': True,
            'language':'{DEU,ENG,ITA}',
            'owner_org': org['name'],
        }

        packages = [pkg1, pkg2]
        ctx = {'ignore_auth': True,
               'user': self._get_user()['name']}

        org_dict = helpers.call_action('organization_create', context=ctx, **org)
        for pkg in packages:
            helpers.call_action('package_create', context=ctx, **pkg)

        for pkg in packages:
            s = RDFSerializer()
            g = s.g
            dataset_ref = s.graph_from_dataset(pkg)
            has_identifier = False
            rights_holders = list(g.objects(dataset_ref, DCT.rightsHolder))

            assert len(rights_holders), "There should be one rights holder for\n {}:\n {}".format(pkg, 
                                                                                                  s.serialize_dataset(pkg))
            for holder_ref in rights_holders:
                _holder_names = list(g.objects(holder_ref, FOAF.name))
                _holder_ids = list((str(ob) for ob in g.objects(holder_ref, DCT.identifier)))
                
                # local dataset will use organization name only
                # while remote will have at least two names - one with lang, one default without lang
                if pkg['dataset_is_local']:
                    num_holder_names = 1
                else:
                    num_holder_names = 2
                assert len(_holder_names) == num_holder_names, _holder_names
                assert len(_holder_ids) == 1
                
                test_id = pkg.get('holder_identifier') or org_dict['identifier']
                has_identifier = _holder_ids[0] == test_id
                assert has_identifier, "No identifier in {} (expected {}) for\n {}\n{}".format(_holder_ids,
                                                                                           test_id,
                                                                                           pkg,
                                                                                           s.serialize_dataset(pkg))
示例#11
0
    def test_holder(self):
        org = {'name': 'org-test', 'title': 'Test org', 'identifier': 'abc'}

        pkg1 = {
            # 'id': '2b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset-1',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'modified': '2016-11-29',
            'identifier': str(uuid.uuid4()),
            'frequency': 'UPDATE_CONT',
            'publisher_name': 'bolzano',
            'publisher_identifier': '234234234',
            'creator_name': 'test',
            'creator_identifier': '789789789',
            'holder_name': 'bolzano',
            'holder_identifier': '234234234',
            FIELD_THEMES_AGGREGATE: themes_to_aggr_json(('ECON', )),
            'theme':
            json.dumps([theme_name_to_uri(name) for name in ('ECON', )]),
            'dataset_is_local': False,
            'language': '{DEU,ENG,ITA}',
        }

        pkg2 = {
            # 'id': 'eb6fe9ca-dc77-4cec-92a4-55c6624a5b00',
            'name': 'test-dataset-2',
            'title': 'Dataset di test DCAT_AP-IT 2',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'modified': '2016-11-29',
            'identifier': str(uuid.uuid4()),
            'frequency': 'UPDATE_CONT',
            'publisher_name': 'bolzano',
            'publisher_identifier': '234234234',
            'creator_name': 'test',
            'creator_identifier': '123123123123',
            FIELD_THEMES_AGGREGATE: themes_to_aggr_json(('ENVI', )),
            'theme':
            json.dumps([theme_name_to_uri(name) for name in ('ENVI', )]),
            'dataset_is_local': True,
            'language': '{DEU,ENG,ITA}',
            'owner_org': org['name'],
        }

        src_packages = [pkg1, pkg2]
        ctx = {'ignore_auth': True, 'user': self._get_user()['name']}

        org_loaded = Group.by_name(org['name'])
        if org_loaded:
            org_dict = org_loaded.__dict__
        else:
            org_dict = helpers.call_action('organization_create',
                                           context=ctx,
                                           **org)
        pkg1['owner_org'] = org_dict['id']
        pkg2['owner_org'] = org_dict['id']

        created_packages = [
            helpers.call_action('package_create', context=ctx, **pkg)
            for pkg in src_packages
        ]

        for pkg in created_packages:
            s = RDFSerializer()
            g = s.g
            dataset_ref = s.graph_from_dataset(pkg)
            has_identifier = False
            rights_holders = list(g.objects(dataset_ref, DCT.rightsHolder))

            assert len(rights_holders), 'There should be one rights holder for\n {}:\n {}'.\
                format(pkg, s.serialize_dataset(pkg))

            for holder_ref in rights_holders:
                _holder_names = list(g.objects(holder_ref, FOAF.name))
                _holder_ids = list(
                    (str(ob) for ob in g.objects(holder_ref, DCT.identifier)))

                # local dataset will use organization name only
                # while remote will have at least two names - one with lang, one default without lang
                if pkg['dataset_is_local']:
                    num_holder_names = 1
                else:
                    num_holder_names = 2
                assert len(_holder_names) == num_holder_names, _holder_names
                assert len(_holder_ids) == 1

                test_id = pkg.get(
                    'holder_identifier') or org_dict['identifier']
                has_identifier = _holder_ids[0] == test_id
                assert has_identifier, \
                    f'No identifier in {_holder_ids} (expected {test_id}) for\n {pkg}\n{s.serialize_dataset(pkg)}'
示例#12
0
    def test_subthemes(self):

        load_themes()

        subthemes = [{
            'theme':
            'AGRI',
            'subthemes': [
                'http://eurovoc.europa.eu/100253',
                'http://eurovoc.europa.eu/100258'
            ]
        }, {
            'theme': 'ENVI',
            'subthemes': []
        }]

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{
                'name': 'Tag 1'
            }, {
                'name': 'Tag 2'
            }],
            'issued': '2016-11-29',
            'modified': '2016-11-29',
            'frequency': 'UPDATE_CONT',
            'publisher_name': 'bolzano',
            'publisher_identifier': '234234234',
            'creator_name': 'test',
            'creator_identifier': '412946129',
            'holder_name': 'bolzano',
            'holder_identifier': '234234234',
            'alternate_identifier': 'ISBN,TEST',
            FIELD_THEMES_AGGREGATE: json.dumps(subthemes),
            'theme': theme_aggr_to_theme_uris(
                subthemes
            )  # this is added dinamically when retrieving datasets from the db
        }

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])

        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())

        assert len(datasets) == 1
        parsed_dataset = datasets[0]

        # test themes
        parsed_themes_raw = _get_extra_value(parsed_dataset.get('extras'),
                                             'theme')
        self.assertIsNotNone(
            parsed_themes_raw,
            f'Themes not found in parsed dataset {parsed_dataset}')
        parsed_themes = json.loads(parsed_themes_raw)
        self.assertEqual(2, len(parsed_themes))
        self.assertSetEqual(set(theme_names_to_uris(['AGRI', 'ENVI'])),
                            set(parsed_themes))

        # test aggregated themes
        parsed_aggr_raw = parsed_dataset.get(FIELD_THEMES_AGGREGATE, None)
        self.assertIsNotNone(
            parsed_aggr_raw,
            f'Aggregated themes not found in parsed dataset {parsed_dataset}')
        parsed_aggr = json.loads(parsed_aggr_raw)
        self.assertIsNotNone(parsed_aggr, 'Aggregate is None')
        self.assertEquals(2, len(parsed_aggr))
        for t in parsed_aggr:
            if t['theme'] == 'ENVI':
                self.assertSetEqual(set([]), set(t['subthemes']))
            elif t['theme'] == 'AGRI':
                self.assertSetEqual(set(subthemes[0]['subthemes']),
                                    set(t['subthemes']))
            else:
                self.fail(f'Unknown theme: {t}')