def test_distribution_format_with_backslash(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'format': 'text/csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.mediaType, resource['format'])
def test_spatial(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'}, {'key': 'spatial_text', 'value': 'Tarragona'}, {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] assert spatial eq_(unicode(spatial), extras['spatial_uri']) assert self._triple(g, spatial, RDF.type, DCT.Location) assert self._triple(g, spatial, SKOS.prefLabel, extras['spatial_text']) eq_(len([t for t in g.triples((spatial, LOCN.geometry, None))]), 2) # Geometry in GeoJSON assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) # Geometry in WKT wkt_geom = wkt.dumps(json.loads(extras['spatial']), decimals=4) assert self._triple(g, spatial, LOCN.geometry, wkt_geom, GSP.wktLiteral)
def test_spatial_bad_json_no_wkt(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ { 'key': 'spatial', 'value': 'NotJSON' }, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) spatial = self._triple(g, dataset_ref, DCT.spatial, None)[2] assert spatial assert isinstance(spatial, BNode) # Geometry in GeoJSON assert self._triple(g, spatial, LOCN.geometry, extras['spatial'], GEOJSON_IMT) # Geometry in WKT assert len([t for t in g.triples((spatial, LOCN.geometry, None))]) == 1
def test_distribution_format_with_backslash(self): resource = { "id": "c041c635-054f-4431-b647-f9186926d021", "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "CSV file", "url": "http://example.com/data/file.csv", "format": "text/csv", } dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "title": "Test DCAT dataset", "resources": [resource], } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.mediaType, resource["format"])
def test_contact_details_extras(self): dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "maintainer": "Example Maintainer", "maintainer_email": "*****@*****.**", "author": "Example Author", "author_email": "*****@*****.**", "extras": [ {"key": "contact_uri", "value": "http://example.com/contact"}, {"key": "contact_name", "value": "Example Contact"}, {"key": "contact_email", "value": "*****@*****.**"}, ], } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) # Contact details contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2] assert contact_details eq_(unicode(contact_details), extras["contact_uri"]) assert self._triple(g, contact_details, VCARD.fn, extras["contact_name"]) assert self._triple(g, contact_details, VCARD.hasEmail, extras["contact_email"])
def test_hash_algorithm_not_uri(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'hash': 'aaaa', 'hash_algorithm': 'sha1', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] checksum = self._triple(g, distribution, SPDX.checksum, None)[2] assert checksum assert self._triple(g, checksum, RDF.type, SPDX.Checksum) assert self._triple( g, checksum, SPDX.checksumValue, resource['hash'], data_type='http://www.w3.org/2001/XMLSchema#hexBinary') assert self._triple(g, checksum, SPDX.algorithm, resource['hash_algorithm'])
def test_distribution_size_not_number(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'size': 'aaaa', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.byteSize, resource['size'])
def test_publisher_extras(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'organization': { 'id': '', 'name': 'publisher1', 'title': 'Example Publisher from Org', }, 'extras': [ {'key': 'publisher_uri', 'value': 'http://example.com/publisher'}, {'key': 'publisher_name', 'value': 'Example Publisher'}, {'key': 'publisher_email', 'value': '*****@*****.**'}, {'key': 'publisher_url', 'value': 'http://example.com/publisher/home'}, {'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2] assert publisher eq_(unicode(publisher), extras['publisher_uri']) assert self._triple(g, publisher, RDF.type, FOAF.Organization) assert self._triple(g, publisher, FOAF.name, extras['publisher_name']) assert self._triple(g, publisher, FOAF.mbox, extras['publisher_email']) assert self._triple(g, publisher, FOAF.homepage, URIRef(extras['publisher_url'])) assert self._triple(g, publisher, DCT.type, extras['publisher_type'])
def test_distribution_both_urls_different(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file', 'download_url': 'http://example.com/data/file.csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2] assert self._triple(g, distribution, SCHEMA.url, resource['url']) assert self._triple(g, distribution, SCHEMA.contentUrl, resource['download_url'])
def test_publisher_extras(self): dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "organization": {"id": "", "name": "publisher1", "title": "Example Publisher from Org"}, "extras": [ {"key": "publisher_uri", "value": "http://example.com/publisher"}, {"key": "publisher_name", "value": "Example Publisher"}, {"key": "publisher_email", "value": "*****@*****.**"}, {"key": "publisher_url", "value": "http://example.com/publisher/home"}, {"key": "publisher_type", "value": "http://purl.org/adms/publishertype/Company"}, ], } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2] assert publisher eq_(unicode(publisher), extras["publisher_uri"]) assert self._triple(g, publisher, RDF.type, FOAF.Organization) assert self._triple(g, publisher, FOAF.name, extras["publisher_name"]) assert self._triple(g, publisher, FOAF.mbox, extras["publisher_email"]) assert self._triple(g, publisher, FOAF.homepage, URIRef(extras["publisher_url"])) assert self._triple(g, publisher, DCT.type, extras["publisher_type"])
def test_distribution_both_urls_the_same(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'download_url': 'http://example.com/data/file.csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2] assert self._triple(g, distribution, SCHEMA.contentUrl, resource['url']) assert self._triple(g, distribution, SCHEMA.url, None) is None
def test_temporal(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ { 'key': 'temporal_start', 'value': '2015-06-26T15:21:09.075774' }, { 'key': 'temporal_end', 'value': '2015-07-14' }, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) temporal = self._triple(g, dataset_ref, DCT.temporal, None)[2] assert temporal assert self._triple(g, temporal, RDF.type, DCT.PeriodOfTime) assert self._triple(g, temporal, SCHEMA.startDate, parse_date(extras['temporal_start']).isoformat(), XSD.dateTime) assert self._triple(g, temporal, SCHEMA.endDate, parse_date(extras['temporal_end']).isoformat(), XSD.dateTime)
def test_publisher_extras(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'organization': { 'id': '', 'name': 'publisher1', 'title': 'Example Publisher from Org', }, 'extras': [ { 'key': 'publisher_uri', 'value': 'http://example.com/publisher' }, { 'key': 'publisher_name', 'value': 'Example Publisher' }, { 'key': 'publisher_email', 'value': '*****@*****.**' }, { 'key': 'publisher_url', 'value': 'http://example.com/publisher/home' }, { 'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company' }, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, SCHEMA.publisher, None)[2] assert publisher eq_(unicode(publisher), extras['publisher_uri']) assert self._triple(g, publisher, RDF.type, SCHEMA.Organization) assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name']) contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2] assert contact_point assert self._triple(g, contact_point, RDF.type, SCHEMA.ContactPoint) assert self._triple(g, contact_point, SCHEMA.name, extras['publisher_name']) assert self._triple(g, contact_point, SCHEMA.email, extras['publisher_email']) assert self._triple(g, contact_point, SCHEMA.url, extras['publisher_url']) assert self._triple(g, contact_point, SCHEMA.contactType, 'customer service')
def test_temporal_start_and_end(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ { 'key': 'temporal_start', 'value': '2015-06-26T15:21:09.075774' }, { 'key': 'temporal_end', 'value': '2015-07-14' }, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, SCHEMA.temporalCoverage, '2015-06-26T15:21:09.075774/2015-07-14')
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'notes': 'Lorem ipsum', 'url': 'http://example.com/ds1', 'version': '1.0b', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'extras': [ {'key': 'alternate_identifier', 'value': 'xyz'}, {'key': 'version_notes', 'value': 'This is a beta version'}, {'key': 'frequency', 'value': 'monthly'}, {'key': 'language', 'value': '[\"en\"]'}, {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'}, {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCAT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset['title']) assert self._triple(g, dataset_ref, DCT.description, dataset['notes']) assert self._triple(g, dataset_ref, OWL.versionInfo, dataset['version']) assert self._triple(g, dataset_ref, ADMS.versionNotes, extras['version_notes']) assert self._triple(g, dataset_ref, ADMS.identifier, extras['alternate_identifier']) assert self._triple(g, dataset_ref, DCT.accrualPeriodicity, extras['frequency']) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, DCAT.keyword, tag['name']) # Dates assert self._triple(g, dataset_ref, DCT.issued, dataset['metadata_created'], XSD.dateTime) assert self._triple(g, dataset_ref, DCT.modified, dataset['metadata_modified'], XSD.dateTime) # List for item in [ ('language', DCT.language), ('theme', DCAT.theme), ('conforms_to', DCAT.conformsTo), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], value)
def test_distribution_both_urls_the_same(self): resource = { "id": "c041c635-054f-4431-b647-f9186926d021", "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "CSV file", "url": "http://example.com/data/file.csv", "download_url": "http://example.com/data/file.csv", } dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "title": "Test DCAT dataset", "resources": [resource], } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource["url"])) assert self._triple(g, distribution, DCAT.accessURL, None) is None
def test_distribution_format(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'format': 'CSV', 'mimetype': 'text/csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2] assert self._triple(g, distribution, SCHEMA.encodingFormat, resource['format']) assert self._triple(g, distribution, SCHEMA.fileType, resource['mimetype'])
def test_identifier_extra(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ { 'key': 'identifier', 'value': 'idxxx' }, { 'key': 'guid', 'value': 'guidyyy' }, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, extras['identifier'])
def test_distribution_format_with_mimetype_fallback(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'format': '', 'mimetype': 'text/csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2] assert self._triple(g, distribution, SCHEMA.encodingFormat, resource['mimetype'])
def _build_graph_and_check_format_mediatype(self, dataset_dict, expected_format, expected_mediatype): """ Creates a graph based on the given dict and checks for dct:format and dct:mediaType in the first resource element. :param dataset_dict: dataset dict, expected to contain one resource :param expected_format: expected list of dct:format items in the resource :param expected_mediatype: expected list of dcat:mediaType items in the resource """ s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset_dict) # graph should contain the expected nodes resource_ref = list(g.objects(dataset_ref, DCAT.distribution))[0] dct_format = list(g.objects(resource_ref, DCT['format'])) dcat_mediatype = list(g.objects(resource_ref, DCAT.mediaType)) assert expected_format == dct_format assert expected_mediatype == dcat_mediatype
def test_contact_details_extras(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'maintainer': 'Example Maintainer', 'maintainer_email': '*****@*****.**', 'author': 'Example Author', 'author_email': '*****@*****.**', 'extras': [ {'key': 'contact_uri', 'value': 'http://example.com/contact'}, {'key': 'contact_name', 'value': 'Example Contact'}, {'key': 'contact_email', 'value': '*****@*****.**'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) # Contact details contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2] assert contact_details eq_(unicode(contact_details), extras['contact_uri']) assert self._triple(g, contact_details, VCARD.fn, extras['contact_name']) assert self._triple(g, contact_details, VCARD.hasEmail, extras['contact_email'])
def test_hash_algorithm_not_uri(self): resource = { "id": "c041c635-054f-4431-b647-f9186926d021", "package_id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "CSV file", "hash": "aaaa", "hash_algorithm": "sha1", } dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "title": "Test DCAT dataset", "resources": [resource], } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] checksum = self._triple(g, distribution, SPDX.checksum, None)[2] assert checksum assert self._triple( g, checksum, SPDX.checksumValue, resource["hash"], data_type="http://www.w3.org/2001/XMLSchema#hexBinary" ) assert self._triple(g, checksum, SPDX.algorithm, resource["hash_algorithm"])
def test_distribution_both_urls_the_same(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'url': 'http://example.com/data/file.csv', 'download_url': 'http://example.com/data/file.csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource['url'])) assert self._triple(g, distribution, DCAT.accessURL, None) is None
def test_hash_algorithm_not_uri(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'hash': 'aaaa', 'hash_algorithm': 'sha1', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] checksum = self._triple(g, distribution, SPDX.checksum, None)[2] assert checksum assert self._triple(g, checksum, SPDX.checksumValue, resource['hash'], data_type='http://www.w3.org/2001/XMLSchema#hexBinary') assert self._triple(g, checksum, SPDX.algorithm, resource['hash_algorithm'])
def test_spatial(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/'}, {'key': 'spatial_text', 'value': 'Tarragona'}, {'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}'}, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) spatial = self._triple(g, dataset_ref, SCHEMA.spatialCoverage, None)[2] assert spatial eq_(unicode(spatial), extras['spatial_uri']) assert self._triple(g, spatial, RDF.type, SCHEMA.Place) assert self._triple(g, spatial, SCHEMA.description, extras['spatial_text']) geo = self._triple(g, spatial, SCHEMA.geo, None)[2] assert self._triple(g, geo, RDF.type, SCHEMA.GeoShape) assert self._triple(g, geo, SCHEMA.polygon, extras['spatial'])
def test_distribution_both_urls_different_with_access_url(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'access_url': 'http://example.com/data/file', 'download_url': 'http://example.com/data/file.csv', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] assert self._triple(g, distribution, DCAT.accessURL, URIRef(resource['access_url'])) assert self._triple(g, distribution, DCAT.downloadURL, URIRef(resource['download_url']))
def test_distribution_fields(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'description': 'A CSV file', 'url': 'http://example.com/data/file.csv', 'status': 'http://purl.org/adms/status/Completed', 'rights': 'Some statement about rights', 'license': 'http://creativecommons.org/licenses/by/3.0/', 'issued': '2015-06-26T15:21:09.034694', 'modified': '2015-06-26T15:21:09.075774', 'size': 1234, 'language': '[\"en\", \"es\", \"ca\"]', } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(len([t for t in g.triples((dataset_ref, SCHEMA.distribution, None))]), 1) # URI distribution = self._triple(g, dataset_ref, SCHEMA.distribution, None)[2] eq_(unicode(distribution), utils.resource_uri(resource)) # Basic fields assert self._triple(g, distribution, RDF.type, SCHEMA.DataDownload) assert self._triple(g, distribution, SCHEMA.name, resource['name']) assert self._triple(g, distribution, SCHEMA.description, resource['description']) assert self._triple(g, distribution, SCHEMA.license, resource['license']) # List for item in [ ('language', SCHEMA.inLanguage), ]: values = json.loads(resource[item[0]]) eq_(len([t for t in g.triples((distribution, item[1], None))]), len(values)) for value in values: assert self._triple(g, distribution, item[1], value) # Dates assert self._triple(g, distribution, SCHEMA.datePublished, resource['issued']) assert self._triple(g, distribution, SCHEMA.dateModified, resource['modified']) # Numbers assert self._triple(g, distribution, SCHEMA.contentSize, resource['size'])
def test_identifier_id(self): dataset = {"id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset"} s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, dataset["id"])
def test_distribution_fields(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'description': 'A CSV file', 'url': 'http://example.com/data/file.csv', 'status': 'http://purl.org/adms/status/Completed', 'rights': 'Some statement about rights', 'license': 'http://creativecommons.org/licenses/by/3.0/', 'issued': '2015-06-26T15:21:09.034694', 'modified': '2015-06-26T15:21:09.075774', 'size': 1234, } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [resource] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_( len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]), 1) # URI distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] eq_(unicode(distribution), utils.resource_uri(resource)) # Basic fields assert self._triple(g, distribution, RDF.type, DCAT.Distribution) assert self._triple(g, distribution, DCT.title, resource['name']) assert self._triple(g, distribution, DCT.description, resource['description']) assert self._triple(g, distribution, DCT.rights, resource['rights']) assert self._triple(g, distribution, DCT.license, resource['license']) assert self._triple(g, distribution, ADMS.status, resource['status']) # Dates assert self._triple(g, distribution, DCT.issued, resource['issued'], XSD.dateTime) assert self._triple(g, distribution, DCT.modified, resource['modified'], XSD.dateTime) # Numbers assert self._triple(g, distribution, DCAT.byteSize, float(resource['size']), XSD.decimal)
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Dataset di test DCAT_AP-IT', 'notes': 'dcatapit dataset di test', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'tags': [{ 'name': 'Tag 1' }, { 'name': 'Tag 2' }], 'issued': '2016-11-29', 'modified': '2016-11-29', 'identifier': 'ISBN', 'temporal_start': '2016-11-01', 'temporal_end': '2016-11-30', 'frequency': 'UPDATE_CONT', 'publisher_name': 'bolzano', 'publisher_identifier': '234234234', 'creator_name': 'test', 'creator_identifier': '412946129', 'holder_name': 'bolzano', 'holder_identifier': '234234234', 'alternate_identifier': 'ISBN,TEST', 'theme': '{ECON,ENVI}', 'geographical_geonames_url': 'http://www.geonames.org/3181913', 'language': '{DEU,ENG,ITA}', 'is_version_of': 'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2', 'conforms_to': '{CONF1,CONF2,CONF3}' } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, DCATAPIT.Dataset) assert self._triple(g, dataset_ref, DCT.title, dataset['title']) assert self._triple(g, dataset_ref, DCT.description, dataset['notes']) assert self._triple(g, dataset_ref, DCT.identifier, dataset['identifier']) # Tags eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, DCAT.keyword, tag['name'])
def test_identifier_id(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
def test_distribution_fields(self): resource = { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file', 'description': 'A CSV file', 'url': 'http://example.com/data/file.csv', 'status': 'http://purl.org/adms/status/Completed', 'rights': 'Some statement about rights', 'license': 'http://creativecommons.org/licenses/by/3.0/', 'issued': '2015-06-26T15:21:09.034694', 'modified': '2015-06-26T15:21:09.075774', 'size': 1234, } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ resource ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]), 1) # URI distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] eq_(unicode(distribution), utils.resource_uri(resource)) # Basic fields assert self._triple(g, distribution, RDF.type, DCAT.Distribution) assert self._triple(g, distribution, DCT.title, resource['name']) assert self._triple(g, distribution, DCT.description, resource['description']) assert self._triple(g, distribution, DCT.rights, resource['rights']) assert self._triple(g, distribution, DCT.license, resource['license']) assert self._triple(g, distribution, ADMS.status, resource['status']) # Dates assert self._triple(g, distribution, DCT.issued, resource['issued'], XSD.dateTime) assert self._triple(g, distribution, DCT.modified, resource['modified'], XSD.dateTime) # Numbers assert self._triple(g, distribution, DCAT.byteSize, float(resource['size']), XSD.decimal)
def test_identifier_guid(self): dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "extras": [{"key": "guid", "value": "guidyyy"}], } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, extras["guid"])
def test_alternate_identifier_numeric(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'alternate_identifier', 'value': '1.0'}, ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, dataset['id'])
def test_catalog(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) data_catalog = self._triple(g, dataset_ref, SCHEMA.includedInDataCatalog, None)[2] assert data_catalog assert self._triple(g, data_catalog, RDF.type, SCHEMA.DataCatalog) assert self._triple(g, data_catalog, SCHEMA.url, 'http://ckan.example.org') assert self._triple(g, data_catalog, SCHEMA.name, 'ckan.example.org') assert self._triple(g, data_catalog, SCHEMA.description, 'CKAN Portal')
def test_temporal_start_only(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'temporal_start', 'value': '2015-06-26T15:21:09.075774'}, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, SCHEMA.temporalCoverage, parse_date(extras['temporal_start']).isoformat())
def test_identifier_guid(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'guid', 'value': 'guidyyy'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) assert self._triple(g, dataset_ref, DCT.identifier, extras['guid'])
def test_publisher_org(self): dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "organization": {"id": "", "name": "publisher1", "title": "Example Publisher from Org"}, } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2] assert publisher assert self._triple(g, publisher, RDF.type, FOAF.Organization) assert self._triple(g, publisher, FOAF.name, dataset["organization"]["title"])
def test_distributions(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'resources': [ { 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'CSV file' }, { 'id': '8bceeda9-0084-477f-aa33-dad6148900d5', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'XLS file' }, { 'id': 'da73d939-0f11-45a1-9733-5de108383133', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'PDF file' }, ] } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_( len([t for t in g.triples((dataset_ref, DCAT.distribution, None))]), 3) for resource in dataset['resources']: distribution = self._triple(g, dataset_ref, DCAT.distribution, URIRef( utils.resource_uri(resource)))[2] assert self._triple(g, distribution, RDF.type, DCAT.Distribution) assert self._triple(g, distribution, DCT.title, resource['name'])
def test_contact_details_author(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'author': 'Example Author', 'author_email': '*****@*****.**', } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2] assert contact_details assert_true(isinstance(contact_details, BNode)) assert self._triple(g, contact_details, VCARD.fn, dataset['author']) assert self._triple(g, contact_details, VCARD.hasEmail, dataset['author_email'])
def test_contact_details_author(self): dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "author": "Example Author", "author_email": "*****@*****.**", } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2] assert contact_details assert_true(isinstance(contact_details, BNode)) assert self._triple(g, contact_details, VCARD.fn, dataset["author"]) assert self._triple(g, contact_details, VCARD.hasEmail, dataset["author_email"])
def test_contact_details_no_duplicate_mailto(self): # tests that mailto: isn't added again if it is stored in the dataset dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'author': 'Example Author', 'author_email': 'mailto:[email protected]', } s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) contact_details = self._triple(g, dataset_ref, DCAT.contactPoint, None)[2] assert contact_details assert_true(isinstance(contact_details, BNode)) assert self._triple(g, contact_details, VCARD.fn, dataset['author']) assert self._triple(g, contact_details, VCARD.hasEmail, URIRef(dataset['author_email']))
def test_publisher_no_uri(self): dataset = { "id": "4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6", "name": "test-dataset", "extras": [{"key": "publisher_name", "value": "Example Publisher"}], } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2] assert publisher assert_true(isinstance(publisher, BNode)) assert self._triple(g, publisher, RDF.type, FOAF.Organization) assert self._triple(g, publisher, FOAF.name, extras["publisher_name"])
def test_publisher_org(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'organization': { 'id': '', 'name': 'publisher1', 'title': 'Example Publisher from Org', } } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, SCHEMA.publisher, None)[2] assert publisher assert self._triple(g, publisher, RDF.type, SCHEMA.Organization) assert self._triple(g, publisher, SCHEMA.name, dataset['organization']['title'])
def test_publisher_no_uri(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ {'key': 'publisher_name', 'value': 'Example Publisher'}, ] } extras = self._extras(dataset) s = RDFSerializer() g = s.g dataset_ref = s.graph_from_dataset(dataset) publisher = self._triple(g, dataset_ref, DCT.publisher, None)[2] assert publisher assert_true(isinstance(publisher, BNode)) assert self._triple(g, publisher, RDF.type, FOAF.Organization) assert self._triple(g, publisher, FOAF.name, extras['publisher_name'])
def test_groups(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'groups': [ { 'id': 'geography', 'name': 'geography', 'display_name': 'Geography', }, { 'id': 'statistics', 'name': 'statistics', 'display_name': 'Statistics', }, ] } s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) about = self._triples(g, dataset_ref, SCHEMA.about, None) assert len(about) == 2, 'There are not exactly 2 groups' names = [] urls = [] for item in about: names.append(str(g.value(item[2], SCHEMA.name))) urls.append(str(g.value(item[2], SCHEMA.url))) assert sorted(names), ['geography' == 'statistics'] assert (sorted(urls) == [ '{}/group/geography'.format(config['ckan.site_url'].rstrip('/')), '{}/group/statistics'.format(config['ckan.site_url'].rstrip('/')) ])
def test_spatial(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ { 'key': 'spatial_uri', 'value': 'http://sws.geonames.org/6361390/' }, { 'key': 'spatial_text', 'value': 'Tarragona' }, { 'key': 'spatial', 'value': '{"type": "Polygon", "coordinates": [[[1.1870606,41.0786393],[1.1870606,41.1655218],[1.3752339,41.1655218],[1.3752339,41.0786393],[1.1870606,41.0786393]]]}' }, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) spatial = self._triple(g, dataset_ref, SCHEMA.spatialCoverage, None)[2] assert spatial assert str(spatial) == extras['spatial_uri'] assert self._triple(g, spatial, RDF.type, SCHEMA.Place) assert self._triple(g, spatial, SCHEMA.description, extras['spatial_text']) geo = self._triple(g, spatial, SCHEMA.geo, None)[2] assert self._triple(g, geo, RDF.type, SCHEMA.GeoShape) assert self._triple(g, geo, SCHEMA.polygon, extras['spatial'])
def test_license(self): def get_path(fname): return os.path.join(os.path.dirname(__file__), '..', '..', '..', 'examples', fname) licenses = get_path('licenses.rdf') load_from_graph(path=licenses) Session.flush() dataset = {'title': 'some title', 'id': 'sometitle', 'resources': [ { 'id': 'resource/1111', 'uri': 'http://resource/1111', 'license_type': 'invalid', }, { 'id': 'resource/2222', 'uri': 'http://resource/2222', 'license_type': 'https://w3id.org/italia/controlled-vocabulary/licences/A311_GFDL13' } ] } p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap']) s = RDFSerializer() dataset_ref = s.graph_from_dataset(dataset) g = s.g r1 = URIRef(dataset['resources'][0]['uri']) r2 = URIRef(dataset['resources'][1]['uri']) unknown = License.get(License.DEFAULT_LICENSE) license_ref = g.value(r1, DCT.license) assert license_ref is not None assert str(license_ref) == unknown.uri,\ "got license {}, instead of {}".format(license_ref, unknown.license_type) gpl = License.get(dataset['resources'][1]['license_type']) assert gpl is not None license_ref = g.value(r2, DCT.license) license_type = g.value(license_ref, DCT.type) assert license_ref is not None assert str(license_ref) == gpl.document_uri assert str(license_type) == gpl.license_type serialized = s.serialize_dataset(dataset) p.parse(serialized) datasets = list(p.datasets()) assert len(datasets) == 1 new_dataset = datasets[0] resources = new_dataset['resources'] def _find_res(res_uri): for res in resources: if res_uri == res['uri']: return res raise ValueError("No resource for {}".format(res_uri)) new_res_unknown = _find_res(str(r1)) new_res_gpl = _find_res(str(r2)) assert new_res_unknown['license_type'] == unknown.uri, (new_res_unknown['license_type'], unknown.uri,) assert new_res_gpl['license_type'] == dataset['resources'][1]['license_type']
def test_graph_from_dataset(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'title': 'Test DCAT dataset', 'notes': 'Lorem ipsum', 'url': 'http://example.com/ds1', 'version': '1.0b', 'metadata_created': '2015-06-26T15:21:09.034694', 'metadata_modified': '2015-06-26T15:21:09.075774', 'license_title': 'CC-BY 3.0', 'license_url': 'http://creativecommons.org/licenses/by/3.0/', 'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}], 'extras': [ {'key': 'alternate_identifier', 'value': '[\"xyz\", \"abc\"]'}, {'key': 'identifier', 'value': '26be5452-fc5c-11e7-8450-fea9aa178066'}, {'key': 'version_notes', 'value': 'This is a beta version'}, {'key': 'frequency', 'value': 'monthly'}, {'key': 'language', 'value': '[\"en\"]'}, {'key': 'theme', 'value': '[\"http://eurovoc.europa.eu/100142\", \"http://eurovoc.europa.eu/100152\"]'}, {'key': 'conforms_to', 'value': '[\"Standard 1\", \"Standard 2\"]'}, {'key': 'access_rights', 'value': 'public'}, {'key': 'documentation', 'value': '[\"http://dataset.info.org/doc1\", \"http://dataset.info.org/doc2\"]'}, {'key': 'provenance', 'value': 'Some statement about provenance'}, {'key': 'dcat_type', 'value': 'test-type'}, {'key': 'related_resource', 'value': '[\"http://dataset.info.org/related1\", \"http://dataset.info.org/related2\"]'}, {'key': 'has_version', 'value': '[\"https://data.some.org/catalog/datasets/derived-dataset-1\", \"https://data.some.org/catalog/datasets/derived-dataset-2\"]'}, {'key': 'is_version_of', 'value': '[\"https://data.some.org/catalog/datasets/original-dataset\"]'}, {'key': 'source', 'value': '[\"https://data.some.org/catalog/datasets/source-dataset-1\", \"https://data.some.org/catalog/datasets/source-dataset-2\"]'}, {'key': 'sample', 'value': '[\"https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/sample\"]'}, ] } extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) g = s.g dataset_ref = s.graph_from_dataset(dataset) eq_(unicode(dataset_ref), utils.dataset_uri(dataset)) # Basic fields assert self._triple(g, dataset_ref, RDF.type, SCHEMA.Dataset) assert self._triple(g, dataset_ref, SCHEMA.name, dataset['title']) assert self._triple(g, dataset_ref, SCHEMA.description, dataset['notes']) assert self._triple(g, dataset_ref, SCHEMA.version, dataset['version']) assert self._triple(g, dataset_ref, SCHEMA.license, dataset['license_url']) assert self._triple(g, dataset_ref, SCHEMA.identifier, extras['identifier']) url = self._triple(g, dataset_ref, SCHEMA.url, None)[2] assert url eq_(url, Literal('http://test.ckan.net/dataset/%s' % dataset['name'])) # Dates assert self._triple(g, dataset_ref, SCHEMA.datePublished, dataset['metadata_created']) assert self._triple(g, dataset_ref, SCHEMA.dateModified, dataset['metadata_modified']) # Tags eq_(len([t for t in g.triples((dataset_ref, SCHEMA.keywords, None))]), 2) for tag in dataset['tags']: assert self._triple(g, dataset_ref, SCHEMA.keywords, tag['name']) # List for item in [ ('language', SCHEMA.inLanguage, Literal), ]: values = json.loads(extras[item[0]]) eq_(len([t for t in g.triples((dataset_ref, item[1], None))]), len(values)) for value in values: assert self._triple(g, dataset_ref, item[1], item[2](value))