def test_distribution_format_with_backslash(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'format': 'text/csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.mediaType, resource['format'])
    def test_spatial(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'},
                {'key': 'spatial_text', 'value': 'Tarragona'},
                {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'},

            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2]
        assert spatial
        eq_(unicode(spatial), extras['spatial_uri'])
        assert self._triple(g, spatial, RDF.type, DCT.Location)
        assert self._triple(g, spatial, SKOS.prefLabel, extras['spatial_text'])

        eq_(len([t for t in g.triples((spatial, LOCN.geometry, None))]), 2)
        # Geometry in GeoJSON
        assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT)

        # Geometry in WKT
        wkt_geom = wkt.dumps(json.loads(extras['spatial']), decimals=4)
        assert self._triple(g, spatial, LOCN.geometry, wkt_geom, GSP.wktLiteral)
    def test_spatial_bad_json_no_wkt(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {
                    'key': 'spatial',
                    'value': 'NotJSON'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2]
        assert spatial
        assert isinstance(spatial, BNode)
        # Geometry in GeoJSON
        assert self._triple(g, spatial, LOCN.geometry, extras['spatial'],
                            GEOJSON_IMT)

        # Geometry in WKT
        assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 1
    def test_distribution_format_with_backslash(self):

        resource = {
            "id": "c041c635-054f-4431-b647-f9186926d021",
            "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "CSV file",
            "url": "http://example.com/data/file.csv",
            "format": "text/csv",
        }

        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "title": "Test DCAT dataset",
            "resources": [resource],
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.mediaType, resource["format"])
    def test_contact_details_extras(self):
        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "maintainer": "Example Maintainer",
            "maintainer_email": "*****@*****.**",
            "author": "Example Author",
            "author_email": "*****@*****.**",
            "extras": [
                {"key": "contact_uri", "value": "http://example.com/contact"},
                {"key": "contact_name", "value": "Example Contact"},
                {"key": "contact_email", "value": "*****@*****.**"},
            ],
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        # Contact details

        contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2]
        assert contact_details
        eq_(unicode(contact_details), extras["contact_uri"])
        assert self._triple(g, contact_details, VCARD.fn, extras["contact_name"])
        assert self._triple(g, contact_details, VCARD.hasEmail, extras["contact_email"])
    def test_hash_algorithm_not_uri(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'hash': 'aaaa',
            'hash_algorithm': 'sha1',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        checksum = self._triple(g, distribution, SPDX.checksum, None)[2]
        assert checksum
        assert self._triple(g, checksum, RDF.type, SPDX.Checksum)
        assert self._triple(
            g,
            checksum,
            SPDX.checksumValue,
            resource['hash'],
            data_type='http://www.w3.org/2001/XMLSchema#hexBinary')
        assert self._triple(g, checksum, SPDX.algorithm,
                            resource['hash_algorithm'])
    def test_distribution_size_not_number(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'size': 'aaaa',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.byteSize, resource['size'])
    def test_publisher_extras(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'organization': {
                'id': '',
                'name': 'publisher1',
                'title': 'Example Publisher from Org',
            },
            'extras': [
                {'key': 'publisher_uri', 'value': 'http://example.com/publisher'},
                {'key': 'publisher_name', 'value': 'Example Publisher'},
                {'key': 'publisher_email', 'value': '*****@*****.**'},
                {'key': 'publisher_url', 'value': 'http://example.com/publisher/home'},
                {'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'},
            ]


        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2]
        assert publisher
        eq_(unicode(publisher), extras['publisher_uri'])

        assert self._triple(g, publisher, RDF.type, FOAF.Organization)
        assert self._triple(g, publisher, FOAF.name, extras['publisher_name'])
        assert self._triple(g, publisher, FOAF.mbox, extras['publisher_email'])
        assert self._triple(g, publisher, FOAF.homepage, URIRef(extras['publisher_url']))
        assert self._triple(g, publisher, DCT.type, extras['publisher_type'])
    def test_distribution_both_urls_different(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file',
            'download_url': 'http://example.com/data/file.csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2]

        assert self._triple(g, distribution, SCHEMA.url, resource['url'])
        assert self._triple(g, distribution, SCHEMA.contentUrl, resource['download_url'])
    def test_publisher_extras(self):
        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "organization": {"id": "", "name": "publisher1", "title": "Example Publisher from Org"},
            "extras": [
                {"key": "publisher_uri", "value": "http://example.com/publisher"},
                {"key": "publisher_name", "value": "Example Publisher"},
                {"key": "publisher_email", "value": "*****@*****.**"},
                {"key": "publisher_url", "value": "http://example.com/publisher/home"},
                {"key": "publisher_type", "value": "http://purl.org/adms/publishertype/Company"},
            ],
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2]
        assert publisher
        eq_(unicode(publisher), extras["publisher_uri"])

        assert self._triple(g, publisher, RDF.type, FOAF.Organization)
        assert self._triple(g, publisher, FOAF.name, extras["publisher_name"])
        assert self._triple(g, publisher, FOAF.mbox, extras["publisher_email"])
        assert self._triple(g, publisher, FOAF.homepage, URIRef(extras["publisher_url"]))
        assert self._triple(g, publisher, DCT.type, extras["publisher_type"])
示例#11
0
    def test_distribution_both_urls_the_same(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'download_url': 'http://example.com/data/file.csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, SCHEMA.distribution,
                                    None)[2]

        assert self._triple(g, distribution, SCHEMA.contentUrl,
                            resource['url'])
        assert self._triple(g, distribution, SCHEMA.url, None) is None
    def test_temporal(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'extras': [
                {
                    'key': 'temporal_start',
                    'value': '2015-06-26T15:21:09.075774'
                },
                {
                    'key': 'temporal_end',
                    'value': '2015-07-14'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        temporal = self._triple(g, dataset_ref, DCT.temporal, None)[2]
        assert temporal

        assert self._triple(g, temporal, RDF.type, DCT.PeriodOfTime)
        assert self._triple(g, temporal, SCHEMA.startDate,
                            parse_date(extras['temporal_start']).isoformat(),
                            XSD.dateTime)
        assert self._triple(g, temporal, SCHEMA.endDate,
                            parse_date(extras['temporal_end']).isoformat(),
                            XSD.dateTime)
示例#13
0
    def test_publisher_extras(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'organization': {
                'id': '',
                'name': 'publisher1',
                'title': 'Example Publisher from Org',
            },
            'extras': [
                {
                    'key': 'publisher_uri',
                    'value': 'http://example.com/publisher'
                },
                {
                    'key': 'publisher_name',
                    'value': 'Example Publisher'
                },
                {
                    'key': 'publisher_email',
                    'value': '*****@*****.**'
                },
                {
                    'key': 'publisher_url',
                    'value': 'http://example.com/publisher/home'
                },
                {
                    'key': 'publisher_type',
                    'value': 'http://purl.org/adms/publishertype/Company'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, SCHEMA.publisher, None)[2]
        assert publisher
        eq_(unicode(publisher), extras['publisher_uri'])
        assert self._triple(g, publisher, RDF.type, SCHEMA.Organization)
        assert self._triple(g, publisher, SCHEMA.name,
                            extras['publisher_name'])

        contact_point = self._triple(g, publisher, SCHEMA.contactPoint,
                                     None)[2]
        assert contact_point
        assert self._triple(g, contact_point, RDF.type, SCHEMA.ContactPoint)
        assert self._triple(g, contact_point, SCHEMA.name,
                            extras['publisher_name'])
        assert self._triple(g, contact_point, SCHEMA.email,
                            extras['publisher_email'])
        assert self._triple(g, contact_point, SCHEMA.url,
                            extras['publisher_url'])
        assert self._triple(g, contact_point, SCHEMA.contactType,
                            'customer service')
示例#14
0
    def test_temporal_start_and_end(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'extras': [
                {
                    'key': 'temporal_start',
                    'value': '2015-06-26T15:21:09.075774'
                },
                {
                    'key': 'temporal_end',
                    'value': '2015-07-14'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, SCHEMA.temporalCoverage,
                            '2015-06-26T15:21:09.075774/2015-07-14')
    def test_graph_from_dataset(self):

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'notes': 'Lorem ipsum',
            'url': 'http://example.com/ds1',
            'version': '1.0b',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'extras': [
                {'key': 'alternate_identifier', 'value': 'xyz'},
                {'key': 'version_notes', 'value': 'This is a beta version'},
                {'key': 'frequency', 'value': 'monthly'},
                {'key': 'language', 'value': '[\"en\"]'},
                {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'},
                {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'},

            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(unicode(dataset_ref), utils.dataset_uri(dataset))

        # Basic fields
        assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset)
        assert self._triple(g, dataset_ref, DCT.title, dataset['title'])
        assert self._triple(g, dataset_ref, DCT.description, dataset['notes'])

        assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version'])
        assert self._triple(g, dataset_ref, ADMS.versionNotes, extras['version_notes'])
        assert self._triple(g, dataset_ref, ADMS.identifier, extras['alternate_identifier'])
        assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras['frequency'])

        # Tags
        eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2)
        for tag in dataset['tags']:
            assert self._triple(g, dataset_ref, DCAT.keyword, tag['name'])

        # Dates
        assert self._triple(g, dataset_ref, DCT.issued, dataset['metadata_created'], XSD.dateTime)
        assert self._triple(g, dataset_ref, DCT.modified, dataset['metadata_modified'], XSD.dateTime)

        # List
        for item in [
            ('language', DCT.language),
            ('theme', DCAT.theme),
            ('conforms_to', DCAT.conformsTo),
        ]:
            values = json.loads(extras[item[0]])
            eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values))
            for value in values:
                assert self._triple(g, dataset_ref, item[1], value)
    def test_distribution_both_urls_the_same(self):

        resource = {
            "id": "c041c635-054f-4431-b647-f9186926d021",
            "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "CSV file",
            "url": "http://example.com/data/file.csv",
            "download_url": "http://example.com/data/file.csv",
        }

        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "title": "Test DCAT dataset",
            "resources": [resource],
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource["url"]))
        assert self._triple(g, distribution, DCAT.accessURL, None) is None
    def test_distribution_format(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'format': 'CSV',
            'mimetype': 'text/csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2]

        assert self._triple(g, distribution, SCHEMA.encodingFormat, resource['format'])
        assert self._triple(g, distribution, SCHEMA.fileType, resource['mimetype'])
    def test_identifier_extra(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'extras': [
                {
                    'key': 'identifier',
                    'value': 'idxxx'
                },
                {
                    'key': 'guid',
                    'value': 'guidyyy'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier,
                            extras['identifier'])
示例#19
0
    def test_distribution_format_with_mimetype_fallback(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'format': '',
            'mimetype': 'text/csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, SCHEMA.distribution,
                                    None)[2]

        assert self._triple(g, distribution, SCHEMA.encodingFormat,
                            resource['mimetype'])
    def _build_graph_and_check_format_mediatype(self, dataset_dict,
                                                expected_format,
                                                expected_mediatype):
        """
        Creates a graph based on the given dict and checks for dct:format and dct:mediaType in the
        first resource element.

        :param dataset_dict:
            dataset dict, expected to contain one resource
        :param expected_format:
            expected list of dct:format items in the resource
        :param expected_mediatype:
            expected list of dcat:mediaType items in the resource
        """
        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset_dict)

        # graph should contain the expected nodes
        resource_ref = list(g.objects(dataset_ref, DCAT.distribution))[0]
        dct_format = list(g.objects(resource_ref, DCT['format']))
        dcat_mediatype = list(g.objects(resource_ref, DCAT.mediaType))
        assert expected_format == dct_format
        assert expected_mediatype == dcat_mediatype
    def test_contact_details_extras(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'maintainer': 'Example Maintainer',
            'maintainer_email': '*****@*****.**',
            'author': 'Example Author',
            'author_email': '*****@*****.**',
            'extras': [
                {'key': 'contact_uri', 'value': 'http://example.com/contact'},
                {'key': 'contact_name', 'value': 'Example Contact'},
                {'key': 'contact_email', 'value': '*****@*****.**'},

            ]


        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        # Contact details

        contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2]
        assert contact_details
        eq_(unicode(contact_details), extras['contact_uri'])
        assert self._triple(g, contact_details, VCARD.fn, extras['contact_name'])
        assert self._triple(g, contact_details, VCARD.hasEmail, extras['contact_email'])
    def test_hash_algorithm_not_uri(self):

        resource = {
            "id": "c041c635-054f-4431-b647-f9186926d021",
            "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "CSV file",
            "hash": "aaaa",
            "hash_algorithm": "sha1",
        }

        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "title": "Test DCAT dataset",
            "resources": [resource],
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        checksum = self._triple(g, distribution, SPDX.checksum, None)[2]
        assert checksum
        assert self._triple(
            g, checksum, SPDX.checksumValue, resource["hash"], data_type="http://www.w3.org/2001/XMLSchema#hexBinary"
        )
        assert self._triple(g, checksum, SPDX.algorithm, resource["hash_algorithm"])
    def test_publisher_extras(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'organization': {
                'id': '',
                'name': 'publisher1',
                'title': 'Example Publisher from Org',
            },
            'extras': [
                {'key': 'publisher_uri', 'value': 'http://example.com/publisher'},
                {'key': 'publisher_name', 'value': 'Example Publisher'},
                {'key': 'publisher_email', 'value': '*****@*****.**'},
                {'key': 'publisher_url', 'value': 'http://example.com/publisher/home'},
                {'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'},
            ]


        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2]
        assert publisher
        eq_(unicode(publisher), extras['publisher_uri'])

        assert self._triple(g, publisher, RDF.type, FOAF.Organization)
        assert self._triple(g, publisher, FOAF.name, extras['publisher_name'])
        assert self._triple(g, publisher, FOAF.mbox, extras['publisher_email'])
        assert self._triple(g, publisher, FOAF.homepage, URIRef(extras['publisher_url']))
        assert self._triple(g, publisher, DCT.type, extras['publisher_type'])
    def test_distribution_both_urls_the_same(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'download_url': 'http://example.com/data/file.csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource['url']))
        assert self._triple(g, distribution, DCAT.accessURL, None) is None
    def test_spatial(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'},
                {'key': 'spatial_text', 'value': 'Tarragona'},
                {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'},

            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2]
        assert spatial
        eq_(unicode(spatial), extras['spatial_uri'])
        assert self._triple(g, spatial, RDF.type, DCT.Location)
        assert self._triple(g, spatial, SKOS.prefLabel, extras['spatial_text'])

        eq_(len([t for t in g.triples((spatial, LOCN.geometry, None))]), 2)
        # Geometry in GeoJSON
        assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT)

        # Geometry in WKT
        wkt_geom = wkt.dumps(json.loads(extras['spatial']), decimals=4)
        assert self._triple(g, spatial, LOCN.geometry, wkt_geom, GSP.wktLiteral)
    def test_hash_algorithm_not_uri(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'hash': 'aaaa',
            'hash_algorithm': 'sha1',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        checksum = self._triple(g, distribution, SPDX.checksum, None)[2]
        assert checksum
        assert self._triple(g, checksum, SPDX.checksumValue, resource['hash'], data_type='http://www.w3.org/2001/XMLSchema#hexBinary')
        assert self._triple(g, checksum, SPDX.algorithm, resource['hash_algorithm'])
    def test_distribution_size_not_number(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'size': 'aaaa',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.byteSize, resource['size'])
    def test_spatial(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'},
                {'key': 'spatial_text', 'value': 'Tarragona'},
                {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'},

            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        spatial = self._triple(g, dataset_ref, SCHEMA.spatialCoverage, None)[2]
        assert spatial
        eq_(unicode(spatial), extras['spatial_uri'])
        assert self._triple(g, spatial, RDF.type, SCHEMA.Place)
        assert self._triple(g, spatial, SCHEMA.description, extras['spatial_text'])
        geo = self._triple(g, spatial, SCHEMA.geo, None)[2]
        assert self._triple(g, geo, RDF.type, SCHEMA.GeoShape)
        assert self._triple(g, geo, SCHEMA.polygon, extras['spatial'])
    def test_distribution_format_with_backslash(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'url': 'http://example.com/data/file.csv',
            'format': 'text/csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.mediaType, resource['format'])
    def test_contact_details_extras(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'maintainer': 'Example Maintainer',
            'maintainer_email': '*****@*****.**',
            'author': 'Example Author',
            'author_email': '*****@*****.**',
            'extras': [
                {'key': 'contact_uri', 'value': 'http://example.com/contact'},
                {'key': 'contact_name', 'value': 'Example Contact'},
                {'key': 'contact_email', 'value': '*****@*****.**'},

            ]


        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        # Contact details

        contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2]
        assert contact_details
        eq_(unicode(contact_details), extras['contact_uri'])
        assert self._triple(g, contact_details, VCARD.fn, extras['contact_name'])
        assert self._triple(g, contact_details, VCARD.hasEmail, extras['contact_email'])
    def test_distribution_both_urls_different_with_access_url(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'access_url': 'http://example.com/data/file',
            'download_url': 'http://example.com/data/file.csv',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

        assert self._triple(g, distribution, DCAT.accessURL,
                            URIRef(resource['access_url']))
        assert self._triple(g, distribution, DCAT.downloadURL,
                            URIRef(resource['download_url']))
    def test_distribution_fields(self):
        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'description': 'A CSV file',
            'url': 'http://example.com/data/file.csv',
            'status': 'http://purl.org/adms/status/Completed',
            'rights': 'Some statement about rights',
            'license': 'http://creativecommons.org/licenses/by/3.0/',
            'issued': '2015-06-26T15:21:09.034694',
            'modified': '2015-06-26T15:21:09.075774',
            'size': 1234,
            'language': '[\"en\", \"es\", \"ca\"]',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(len([t for t in g.triples((dataset_ref, SCHEMA.distribution, None))]), 1)

        # URI
        distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2]
        eq_(unicode(distribution), utils.resource_uri(resource))

        # Basic fields
        assert self._triple(g, distribution, RDF.type, SCHEMA.DataDownload)
        assert self._triple(g, distribution, SCHEMA.name, resource['name'])
        assert self._triple(g, distribution, SCHEMA.description, resource['description'])
        assert self._triple(g, distribution, SCHEMA.license, resource['license'])

        # List
        for item in [
            ('language', SCHEMA.inLanguage),
        ]:
            values = json.loads(resource[item[0]])
            eq_(len([t for t in g.triples((distribution, item[1], None))]), len(values))
            for value in values:
                assert self._triple(g, distribution, item[1], value)

        # Dates
        assert self._triple(g, distribution, SCHEMA.datePublished, resource['issued'])
        assert self._triple(g, distribution, SCHEMA.dateModified, resource['modified'])

        # Numbers
        assert self._triple(g, distribution, SCHEMA.contentSize, resource['size'])
    def test_distribution_fields(self):
        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'description': 'A CSV file',
            'url': 'http://example.com/data/file.csv',
            'status': 'http://purl.org/adms/status/Completed',
            'rights': 'Some statement about rights',
            'license': 'http://creativecommons.org/licenses/by/3.0/',
            'issued': '2015-06-26T15:21:09.034694',
            'modified': '2015-06-26T15:21:09.075774',
            'size': 1234,
            'language': '[\"en\", \"es\", \"ca\"]',
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(len([t for t in g.triples((dataset_ref, SCHEMA.distribution, None))]), 1)

        # URI
        distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2]
        eq_(unicode(distribution), utils.resource_uri(resource))

        # Basic fields
        assert self._triple(g, distribution, RDF.type, SCHEMA.DataDownload)
        assert self._triple(g, distribution, SCHEMA.name, resource['name'])
        assert self._triple(g, distribution, SCHEMA.description, resource['description'])
        assert self._triple(g, distribution, SCHEMA.license, resource['license'])

        # List
        for item in [
            ('language', SCHEMA.inLanguage),
        ]:
            values = json.loads(resource[item[0]])
            eq_(len([t for t in g.triples((distribution, item[1], None))]), len(values))
            for value in values:
                assert self._triple(g, distribution, item[1], value)

        # Dates
        assert self._triple(g, distribution, SCHEMA.datePublished, resource['issued'])
        assert self._triple(g, distribution, SCHEMA.dateModified, resource['modified'])

        # Numbers
        assert self._triple(g, distribution, SCHEMA.contentSize, resource['size'])
    def test_identifier_id(self):
        dataset = {"id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset"}

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, dataset["id"])
示例#35
0
    def test_distribution_fields(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'description': 'A CSV file',
            'url': 'http://example.com/data/file.csv',
            'status': 'http://purl.org/adms/status/Completed',
            'rights': 'Some statement about rights',
            'license': 'http://creativecommons.org/licenses/by/3.0/',
            'issued': '2015-06-26T15:21:09.034694',
            'modified': '2015-06-26T15:21:09.075774',
            'size': 1234,
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [resource]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(
            len([t
                 for t in g.triples((dataset_ref, DCAT.distribution, None))]),
            1)

        # URI
        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
        eq_(unicode(distribution), utils.resource_uri(resource))

        # Basic fields
        assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
        assert self._triple(g, distribution, DCT.title, resource['name'])
        assert self._triple(g, distribution, DCT.description,
                            resource['description'])
        assert self._triple(g, distribution, DCT.rights, resource['rights'])
        assert self._triple(g, distribution, DCT.license, resource['license'])
        assert self._triple(g, distribution, ADMS.status, resource['status'])

        # Dates
        assert self._triple(g, distribution, DCT.issued, resource['issued'],
                            XSD.dateTime)
        assert self._triple(g, distribution, DCT.modified,
                            resource['modified'], XSD.dateTime)

        # Numbers
        assert self._triple(g, distribution, DCAT.byteSize,
                            float(resource['size']), XSD.decimal)
    def test_graph_from_dataset(self):

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{
                'name': 'Tag 1'
            }, {
                'name': 'Tag 2'
            }],
            'issued': '2016-11-29',
            'modified': '2016-11-29',
            'identifier': 'ISBN',
            'temporal_start': '2016-11-01',
            'temporal_end': '2016-11-30',
            'frequency': 'UPDATE_CONT',
            'publisher_name': 'bolzano',
            'publisher_identifier': '234234234',
            'creator_name': 'test',
            'creator_identifier': '412946129',
            'holder_name': 'bolzano',
            'holder_identifier': '234234234',
            'alternate_identifier': 'ISBN,TEST',
            'theme': '{ECON,ENVI}',
            'geographical_geonames_url': 'http://www.geonames.org/3181913',
            'language': '{DEU,ENG,ITA}',
            'is_version_of':
            'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
            'conforms_to': '{CONF1,CONF2,CONF3}'
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(unicode(dataset_ref), utils.dataset_uri(dataset))

        # Basic fields
        assert self._triple(g, dataset_ref, RDF.type, DCATAPIT.Dataset)
        assert self._triple(g, dataset_ref, DCT.title, dataset['title'])
        assert self._triple(g, dataset_ref, DCT.description, dataset['notes'])

        assert self._triple(g, dataset_ref, DCT.identifier,
                            dataset['identifier'])

        # Tags
        eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2)
        for tag in dataset['tags']:
            assert self._triple(g, dataset_ref, DCAT.keyword, tag['name'])
示例#37
0
    def test_identifier_id(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
    def test_identifier_id(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
    def test_distribution_fields(self):

        resource = {
            'id': 'c041c635-054f-4431-b647-f9186926d021',
            'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'CSV file',
            'description': 'A CSV file',
            'url': 'http://example.com/data/file.csv',
            'status': 'http://purl.org/adms/status/Completed',
            'rights': 'Some statement about rights',
            'license': 'http://creativecommons.org/licenses/by/3.0/',
            'issued': '2015-06-26T15:21:09.034694',
            'modified': '2015-06-26T15:21:09.075774',
            'size': 1234,
        }

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'resources': [
                resource
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]), 1)

        # URI
        distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
        eq_(unicode(distribution), utils.resource_uri(resource))

        # Basic fields
        assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
        assert self._triple(g, distribution, DCT.title, resource['name'])
        assert self._triple(g, distribution, DCT.description, resource['description'])
        assert self._triple(g, distribution, DCT.rights, resource['rights'])
        assert self._triple(g, distribution, DCT.license, resource['license'])
        assert self._triple(g, distribution, ADMS.status, resource['status'])

        # Dates
        assert self._triple(g, distribution, DCT.issued, resource['issued'], XSD.dateTime)
        assert self._triple(g, distribution, DCT.modified, resource['modified'], XSD.dateTime)

        # Numbers
        assert self._triple(g, distribution, DCAT.byteSize, float(resource['size']), XSD.decimal)
    def test_identifier_guid(self):
        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "extras": [{"key": "guid", "value": "guidyyy"}],
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, extras["guid"])
示例#41
0
    def test_alternate_identifier_numeric(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'alternate_identifier', 'value': '1.0'},
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
    def test_catalog(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
        }
        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)
        data_catalog = self._triple(g, dataset_ref, SCHEMA.includedInDataCatalog, None)[2]
        assert data_catalog
        assert self._triple(g, data_catalog, RDF.type, SCHEMA.DataCatalog)
        assert self._triple(g, data_catalog, SCHEMA.url, 'http://ckan.example.org')
        assert self._triple(g, data_catalog, SCHEMA.name, 'ckan.example.org')
        assert self._triple(g, data_catalog, SCHEMA.description, 'CKAN Portal')
    def test_alternate_identifier_numeric(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'alternate_identifier', 'value': '1.0'},
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
    def test_temporal_start_only(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'temporal_start', 'value': '2015-06-26T15:21:09.075774'},
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, SCHEMA.temporalCoverage, parse_date(extras['temporal_start']).isoformat())
    def test_identifier_guid(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'guid', 'value': 'guidyyy'},
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        assert self._triple(g, dataset_ref, DCT.identifier, extras['guid'])
    def test_publisher_org(self):
        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "organization": {"id": "", "name": "publisher1", "title": "Example Publisher from Org"},
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2]
        assert publisher

        assert self._triple(g, publisher, RDF.type, FOAF.Organization)
        assert self._triple(g, publisher, FOAF.name, dataset["organization"]["title"])
示例#47
0
    def test_distributions(self):

        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'title':
            'Test DCAT dataset',
            'resources': [
                {
                    'id': 'c041c635-054f-4431-b647-f9186926d021',
                    'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
                    'name': 'CSV file'
                },
                {
                    'id': '8bceeda9-0084-477f-aa33-dad6148900d5',
                    'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
                    'name': 'XLS file'
                },
                {
                    'id': 'da73d939-0f11-45a1-9733-5de108383133',
                    'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
                    'name': 'PDF file'
                },
            ]
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(
            len([t
                 for t in g.triples((dataset_ref, DCAT.distribution, None))]),
            3)

        for resource in dataset['resources']:
            distribution = self._triple(g, dataset_ref, DCAT.distribution,
                                        URIRef(
                                            utils.resource_uri(resource)))[2]

            assert self._triple(g, distribution, RDF.type, DCAT.Distribution)
            assert self._triple(g, distribution, DCT.title, resource['name'])
示例#48
0
    def test_contact_details_author(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'author': 'Example Author',
            'author_email': '*****@*****.**',
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2]
        assert contact_details
        assert_true(isinstance(contact_details, BNode))
        assert self._triple(g, contact_details, VCARD.fn, dataset['author'])
        assert self._triple(g, contact_details, VCARD.hasEmail, dataset['author_email'])
    def test_contact_details_author(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'author': 'Example Author',
            'author_email': '*****@*****.**',
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2]
        assert contact_details
        assert_true(isinstance(contact_details, BNode))
        assert self._triple(g, contact_details, VCARD.fn, dataset['author'])
        assert self._triple(g, contact_details, VCARD.hasEmail, dataset['author_email'])
    def test_contact_details_author(self):
        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "author": "Example Author",
            "author_email": "*****@*****.**",
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2]
        assert contact_details
        assert_true(isinstance(contact_details, BNode))
        assert self._triple(g, contact_details, VCARD.fn, dataset["author"])
        assert self._triple(g, contact_details, VCARD.hasEmail, dataset["author_email"])
    def test_contact_details_no_duplicate_mailto(self):
        # tests that mailto: isn't added again if it is stored in the dataset
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'author': 'Example Author',
            'author_email': 'mailto:[email protected]',
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2]
        assert contact_details
        assert_true(isinstance(contact_details, BNode))
        assert self._triple(g, contact_details, VCARD.fn, dataset['author'])
        assert self._triple(g, contact_details, VCARD.hasEmail, URIRef(dataset['author_email']))
    def test_publisher_no_uri(self):
        dataset = {
            "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6",
            "name": "test-dataset",
            "extras": [{"key": "publisher_name", "value": "Example Publisher"}],
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2]
        assert publisher
        assert_true(isinstance(publisher, BNode))

        assert self._triple(g, publisher, RDF.type, FOAF.Organization)
        assert self._triple(g, publisher, FOAF.name, extras["publisher_name"])
    def test_publisher_org(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'organization': {
                'id': '',
                'name': 'publisher1',
                'title': 'Example Publisher from Org',
            }
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, SCHEMA.publisher, None)[2]
        assert publisher
        assert self._triple(g, publisher, RDF.type, SCHEMA.Organization)
        assert self._triple(g, publisher, SCHEMA.name, dataset['organization']['title'])
    def test_publisher_org(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'organization': {
                'id': '',
                'name': 'publisher1',
                'title': 'Example Publisher from Org',
            }
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, SCHEMA.publisher, None)[2]
        assert publisher
        assert self._triple(g, publisher, RDF.type, SCHEMA.Organization)
        assert self._triple(g, publisher, SCHEMA.name, dataset['organization']['title'])
示例#55
0
    def test_contact_details_no_duplicate_mailto(self):
        # tests that mailto: isn't added again if it is stored in the dataset
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'author': 'Example Author',
            'author_email': 'mailto:[email protected]',
        }

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        contact_details = self._triple(g, dataset_ref, DCAT.contactPoint,
                                       None)[2]
        assert contact_details
        assert_true(isinstance(contact_details, BNode))
        assert self._triple(g, contact_details, VCARD.fn, dataset['author'])
        assert self._triple(g, contact_details, VCARD.hasEmail,
                            URIRef(dataset['author_email']))
示例#56
0
    def test_publisher_no_uri(self):
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'extras': [
                {'key': 'publisher_name', 'value': 'Example Publisher'},
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2]
        assert publisher
        assert_true(isinstance(publisher, BNode))

        assert self._triple(g, publisher, RDF.type, FOAF.Organization)
        assert self._triple(g, publisher, FOAF.name, extras['publisher_name'])
示例#57
0
    def test_groups(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'groups': [
                {
                    'id': 'geography',
                    'name': 'geography',
                    'display_name': 'Geography',
                },
                {
                    'id': 'statistics',
                    'name': 'statistics',
                    'display_name': 'Statistics',
                },
            ]
        }

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        about = self._triples(g, dataset_ref, SCHEMA.about, None)
        assert len(about) == 2, 'There are not exactly 2 groups'

        names = []
        urls = []

        for item in about:
            names.append(str(g.value(item[2], SCHEMA.name)))
            urls.append(str(g.value(item[2], SCHEMA.url)))

        assert sorted(names), ['geography' == 'statistics']
        assert (sorted(urls) == [
            '{}/group/geography'.format(config['ckan.site_url'].rstrip('/')),
            '{}/group/statistics'.format(config['ckan.site_url'].rstrip('/'))
        ])
示例#58
0
    def test_spatial(self):
        dataset = {
            'id':
            '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name':
            'test-dataset',
            'extras': [
                {
                    'key': 'spatial_uri',
                    'value': 'http://sws.geonames.org/6361390/'
                },
                {
                    'key': 'spatial_text',
                    'value': 'Tarragona'
                },
                {
                    'key':
                    'spatial',
                    'value':
                    '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'
                },
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        spatial = self._triple(g, dataset_ref, SCHEMA.spatialCoverage, None)[2]
        assert spatial
        assert str(spatial) == extras['spatial_uri']
        assert self._triple(g, spatial, RDF.type, SCHEMA.Place)
        assert self._triple(g, spatial, SCHEMA.description,
                            extras['spatial_text'])
        geo = self._triple(g, spatial, SCHEMA.geo, None)[2]
        assert self._triple(g, geo, RDF.type, SCHEMA.GeoShape)
        assert self._triple(g, geo, SCHEMA.polygon, extras['spatial'])
    def test_license(self):
        
        def get_path(fname):
            return os.path.join(os.path.dirname(__file__),
                        '..', '..', '..', 'examples', fname)
        licenses = get_path('licenses.rdf')
        load_from_graph(path=licenses)
        Session.flush()


        dataset = {'title': 'some title',
                   'id': 'sometitle',
                   'resources': [
                            {
                                'id': 'resource/1111',
                                'uri': 'http://resource/1111',
                                'license_type': 'invalid',
                            },
                            {
                                'id': 'resource/2222',
                                'uri': 'http://resource/2222',
                                'license_type': 'https://w3id.org/italia/controlled-vocabulary/licences/A311_GFDL13'
                            }
                        ]
                    }
       

        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])

        s = RDFSerializer()


        dataset_ref = s.graph_from_dataset(dataset)

        g = s.g

        r1 = URIRef(dataset['resources'][0]['uri'])
        r2 = URIRef(dataset['resources'][1]['uri'])

        unknown = License.get(License.DEFAULT_LICENSE)

        license_ref = g.value(r1, DCT.license)
        
        assert license_ref is not None
        assert str(license_ref) == unknown.uri,\
            "got license {}, instead of {}".format(license_ref, unknown.license_type)

        gpl = License.get(dataset['resources'][1]['license_type'])
        assert gpl is not None

        license_ref = g.value(r2, DCT.license)
        license_type = g.value(license_ref, DCT.type)
        
        assert license_ref is not None

        assert str(license_ref) == gpl.document_uri
        assert str(license_type) == gpl.license_type

        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())
        assert len(datasets) == 1
        new_dataset = datasets[0]
        resources = new_dataset['resources']

        def _find_res(res_uri):
            for res in resources:
                if res_uri == res['uri']:
                    return res
            raise ValueError("No resource for {}".format(res_uri))

        new_res_unknown = _find_res(str(r1))
        new_res_gpl = _find_res(str(r2))

        assert new_res_unknown['license_type'] == unknown.uri, (new_res_unknown['license_type'], unknown.uri,)
        assert new_res_gpl['license_type'] == dataset['resources'][1]['license_type']
    def test_graph_from_dataset(self):

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Test DCAT dataset',
            'notes': 'Lorem ipsum',
            'url': 'http://example.com/ds1',
            'version': '1.0b',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'license_title': 'CC-BY 3.0',
            'license_url': 'http://creativecommons.org/licenses/by/3.0/',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'extras': [
                {'key': 'alternate_identifier', 'value': '[\"xyz\", \"abc\"]'},
                {'key': 'identifier', 'value': '26be5452-fc5c-11e7-8450-fea9aa178066'},
                {'key': 'version_notes', 'value': 'This is a beta version'},
                {'key': 'frequency', 'value': 'monthly'},
                {'key': 'language', 'value': '[\"en\"]'},
                {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'},
                {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'},
                {'key': 'access_rights', 'value': 'public'},
                {'key': 'documentation', 'value': '[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]'},
                {'key': 'provenance', 'value': 'Some statement about provenance'},
                {'key': 'dcat_type', 'value': 'test-type'},
                {'key': 'related_resource', 'value': '[\"http://dataset.info.org/related1\", \"http://dataset.info.org/related2\"]'},
                {'key': 'has_version', 'value': '[\"https://data.some.org/catalog/datasets/derived-dataset-1\", \"https://data.some.org/catalog/datasets/derived-dataset-2\"]'},
                {'key': 'is_version_of', 'value': '[\"https://data.some.org/catalog/datasets/original-dataset\"]'},
                {'key': 'source', 'value': '[\"https://data.some.org/catalog/datasets/source-dataset-1\", \"https://data.some.org/catalog/datasets/source-dataset-2\"]'},
                {'key': 'sample', 'value': '[\"https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample\"]'},
            ]
        }
        extras = self._extras(dataset)

        s = RDFSerializer(profiles=['schemaorg'])
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(unicode(dataset_ref), utils.dataset_uri(dataset))

        # Basic fields
        assert self._triple(g, dataset_ref, RDF.type, SCHEMA.Dataset)
        assert self._triple(g, dataset_ref, SCHEMA.name, dataset['title'])
        assert self._triple(g, dataset_ref, SCHEMA.description, dataset['notes'])
        assert self._triple(g, dataset_ref, SCHEMA.version, dataset['version'])
        assert self._triple(g, dataset_ref, SCHEMA.license, dataset['license_url'])
        assert self._triple(g, dataset_ref, SCHEMA.identifier, extras['identifier'])
        url = self._triple(g, dataset_ref, SCHEMA.url, None)[2]
        assert url
        eq_(url, Literal('http://test.ckan.net/dataset/%s' % dataset['name']))

        # Dates
        assert self._triple(g, dataset_ref, SCHEMA.datePublished, dataset['metadata_created'])
        assert self._triple(g, dataset_ref, SCHEMA.dateModified, dataset['metadata_modified'])

        # Tags
        eq_(len([t for t in g.triples((dataset_ref, SCHEMA.keywords, None))]), 2)
        for tag in dataset['tags']:
            assert self._triple(g, dataset_ref, SCHEMA.keywords, tag['name'])

        # List
        for item in [
            ('language', SCHEMA.inLanguage, Literal),
        ]:
            values = json.loads(extras[item[0]])
            eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values))
            for value in values:
                assert self._triple(g, dataset_ref, item[1], item[2](value))