def _build_graph_and_check_format_mediatype(self, dataset_dict, expected_format, expected_mediatype): """ Creates a graph based on the given dict and checks for dct:format and dct:mediaType in the first resource element. :param dataset_dict: dataset dict, expected to contain one resource :param expected_format: expected list of dct:format items in the resource :param expected_mediatype: expected list of dcat:mediaType items in the resource """ self.graph = rdflib.Graph() dataset_ref = URIRef("http://example.org/datasets/1") dcat = EuropeanDCATAPProfile(self.graph, False) dcat.graph_from_dataset(dataset_dict, dataset_ref) dcatde = DCATdeProfile(self.graph, False) dcatde.graph_from_dataset(dataset_dict, dataset_ref) # graph should contain the expected nodes resource_ref = list(self.graph.objects(dataset_ref, self.DCAT.distribution))[0] dct_format = list(self.graph.objects(resource_ref, self.DCT['format'])) dcat_mediatype = list(self.graph.objects(resource_ref, self.DCAT.mediaType)) self.assertEqual(expected_format, dct_format) self.assertEqual(expected_mediatype, dcat_mediatype)
def test_graph_from_dataset_only_dcatde_contact_point_values(self): ### prepare ### self.graph = rdflib.Graph() dataset_dict = self._get_default_dataset_dict() # remove fields processed by ckanext-dcat default profile dataset_dict.pop('maintainer') dataset_dict.pop('maintainer_email') dataset_dict.pop('author') dataset_dict.pop('author_email') dataset_ref = URIRef("http://testuri/") ### execute ### dcat = EuropeanDCATAPProfile(self.graph, False) dcat.graph_from_dataset(dataset_dict, dataset_ref) dcatde = DCATdeProfile(self.graph, False) dcatde.graph_from_dataset(dataset_dict, dataset_ref) ### assert ### # contactPoint self._assert_contact_point(dataset_ref, [self.VCARD.fn, self.VCARD.hasEmail])
def test_graph_from_dataset(self): """ test dcat and dcatde profiles """ dataset_dict = { "id": "dct:identifier", "notes": "dct:description", "title": "dct:title", "url": "dcat:landingPage", "version": "owl:versionInfo", "metadata_created": "2017-07-06T13:08:40", "metadata_modified": "2017-07-06T13:08:41", "license_id": "nocheck", "groups": [{ "name": "GRUPPEA" }, { "name": "GRUPPEB" }], "tags": [{ "name": "dcat:keyword" }, { "name": "tagB" }], "author": "nocheck", "author_email": "nocheck", "maintainer": "nocheck", "maintainer_email": "nocheck", "extras": self._transform_to_key_value({ "contributorID": ["dcatde:contributorID"], "qualityProcessURI": "dcatde:qualityProcessURI", "documentation": "foaf:page", "frequency": "dct:accrualPeriodicity", "version_notes": "adms:versionNotes", "dcat_type": "dct:type", "author_url": "nocheck", "maintainer_url": "nocheck", "maintainer_tel": "nocheck", 'maintainer_street': "nocheck", 'maintainer_city': "nocheck", 'maintainer_zip': "nocheck", 'maintainer_country': "nocheck", "publisher_name": "nocheck", "publisher_email": "nocheck", "publisher_url": "nocheck", "originator_name": "nocheck", "originator_email": "nocheck", "originator_url": "nocheck", "contributor_name": "nocheck", "contributor_email": "nocheck", "contributor_url": "nocheck", "access_rights": "dct:accessRights", "provenance": "dct:provenance", "politicalGeocodingLevelURI": "dcatde:politicalGeocodingLevelURI", "politicalGeocodingURI": ["dcatde:politicalGeocodingURI"], "geocodingText": ["dcatde:geocodingText"], "legalbasisText": ["dcatde:legalbasisText"], "temporal_start": "2017-07-06T13:08:40", "temporal_end": "2017-07-06T13:08:41", "spatial": "{\"type\":\"Polygon\",\"coordinates\":[[[8.852920532226562," + "47.97245599240245],[9.133758544921875,47.97245599240245]," + "[9.133758544921875,48.17249666038475],[8.852920532226562," + "48.17249666038475],[8.852920532226562,47.97245599240245]]]}", "language": ["dct:language"], "conforms_to": ["dct:conformsTo"], "alternate_identifier": ["adms:identifier"], "used_datasets": ["dct:relation", "bla"], "has_version": ["dct:hasVersion"], "is_version_of": ["dct:isVersionOf"] }), "resources": [{ "id": "id", "name": "dct:title", "description": "dct:description", "url": "dcat:accessURL", "format:": "dct:format", "mimetype": "dcat:mediaType", "size": 10, "hash": 24, "extras": self._transform_to_key_value({ "issued": "dct:issued", "modified": "dct:modified", "documentation": "foaf:page", "download_url": "dcat:downloadURL", "plannedAvailability": "dcatde:plannedAvailability", "licenseAttributionByText": "dcatde:licenseAttributionByText", "license": "dct:license", "rights": "dct:rights", "status": "adms:status", "language": ["dct:language"], "conforms_to": ["dct:conformsTo"], }) }] } dataset_ref = URIRef("http://testuri/") dcat = EuropeanDCATAPProfile(self.graph, False) dcat.graph_from_dataset(dataset_dict, dataset_ref) dcatde = DCATdeProfile(self.graph, False) dcatde.graph_from_dataset(dataset_dict, dataset_ref) # Assert structure of graph - basic values extras = dataset_dict["extras"] for key in dataset_dict: self._check_simple_items(dataset_dict, dataset_ref, key) for key in extras: self._check_simple_items(dataset_dict, dataset_ref, key) # issued, modified self.assertEqual( len(list(self.graph.objects(dataset_ref, self.DCT.issued))), 1, "dct:issued not found") self.assertEqual( len(list(self.graph.objects(dataset_ref, self.DCT.modified))), 1, "dct:modified not found") # groups, tags self._assert_list(dataset_ref, self.DCAT.theme, [ self.dcat_theme_prefix + x["name"] for x in dataset_dict["groups"] ]) self._assert_list(dataset_ref, self.DCAT.keyword, [x["name"] for x in dataset_dict["tags"]]) # author, maintainer, originator, contributor, publisher self._assert_contact_info(dataset_ref, self.DCATDE.originator) self._assert_contact_info(dataset_ref, self.DCATDE.maintainer) self._assert_contact_info(dataset_ref, self.DCT.contributor) self._assert_contact_info(dataset_ref, self.DCT.creator) self._assert_contact_info(dataset_ref, self.DCT.publisher) # contactPoint contact_point = next( self.graph.objects(dataset_ref, self.DCAT.contactPoint)) vcard_attrs = [ self.VCARD.fn, self.VCARD.hasEmail, self.VCARD.hasURL, self.VCARD.hasTelephone, self.VCARD.hasStreetAddress, self.VCARD.hasLocality, self.VCARD.hasCountryName, self.VCARD.hasPostalCode ] for v_attr in vcard_attrs: self.assertEqual( len(list(self.graph.objects(contact_point, v_attr))), 1, self.DCAT.contactPoint + str(v_attr) + " not found") # temporal temporal = list(self.graph.objects(dataset_ref, self.DCT.temporal))[0] self.assertEqual( len(list(self.graph.objects(temporal, self.SCHEMA.startDate))), 1, self.SCHEMA.startDate + " not found") self.assertEqual( len(list(self.graph.objects(temporal, self.SCHEMA.endDate))), 1, self.SCHEMA.endDate + " not found") # spatial for spatial in list(self.graph.objects(dataset_ref, self.DCT.spatial)): geonodes = len( list(self.graph.objects(spatial, self.LOCN.geometry))) adminnodes = len( list(self.graph.objects(spatial, self.LOCN.adminUnitL2))) if geonodes > 0: self.assertEqual(geonodes, 2, self.LOCN.geometry + " not present, 2x") elif adminnodes > 0: self.assertEqual(adminnodes, 1, self.LOCN.adminUnitL2 + " not present") else: self.fail("No valid spatial blocks found.") # lists in extras self._assert_list(dataset_ref, self.DCT.language, self._get_value_from_extras(extras, "language")) self._assert_list(dataset_ref, self.DCT.conformsTo, self._get_value_from_extras(extras, "conforms_to")) self._assert_list( dataset_ref, self.ADMS.identifier, self._get_value_from_extras(extras, "alternate_identifier")) self._assert_list(dataset_ref, self.DCT.relation, self._get_value_from_extras(extras, "used_datasets")) self._assert_list(dataset_ref, self.DCT.hasVersion, self._get_value_from_extras(extras, "has_version")) self._assert_list(dataset_ref, self.DCT.isVersionOf, self._get_value_from_extras(extras, "is_version_of")) self._assert_list( dataset_ref, self.DCATDE.politicalGeocodingURI, self._get_value_from_extras(extras, "politicalGeocodingURI")) self._assert_list(dataset_ref, self.DCATDE.geocodingText, self._get_value_from_extras(extras, "geocodingText")) self._assert_list( dataset_ref, self.DCATDE.legalbasisText, self._get_value_from_extras(extras, "legalbasisText")) self._assert_list(dataset_ref, self.DCATDE.contributorID, self._get_value_from_extras(extras, "contributorID")) # resources resource = dataset_dict["resources"][0] resource_ref = list( self.graph.objects(dataset_ref, self.DCAT.distribution))[0] resource_extras = resource["extras"] for key in resource: self._check_simple_items(resource, resource_ref, key) for key in resource_extras: self._check_simple_items(resource, resource_ref, key) # size self.assertEqual( len(list(self.graph.objects(resource_ref, self.DCAT.byteSize))), 1, self.DCAT.byteSize + " not found") # hash self.assertEqual( len(list(self.graph.objects(resource_ref, self.SPDX.checksum))), 1, self.SPDX.checksum + " not found") # lists self._assert_list( resource_ref, self.DCT.language, self._get_value_from_extras(resource_extras, "language")) self._assert_list( resource_ref, self.DCT.conformsTo, self._get_value_from_extras(resource_extras, "conforms_to"))
def test_graph_from_dataset(self): """ test dcat and dcatde profiles """ ### prepare ### self.graph = rdflib.Graph() dataset_dict = self._get_default_dataset_dict() dataset_ref = URIRef("http://testuri/") dcat = EuropeanDCATAPProfile(self.graph, False) dcat.graph_from_dataset(dataset_dict, dataset_ref) dcatde = DCATdeProfile(self.graph, False) dcatde.graph_from_dataset(dataset_dict, dataset_ref) # Assert structure of graph - basic values extras = dataset_dict["extras"] for key in dataset_dict: self._check_simple_items(dataset_dict, dataset_ref, key) for key in extras: self._check_simple_items(dataset_dict, dataset_ref, key) # issued, modified self.assertEqual(len(list(self.graph.objects(dataset_ref, self.DCT.issued))), 1, "dct:issued not found") self.assertEqual(len(list(self.graph.objects(dataset_ref, self.DCT.modified))), 1, "dct:modified not found") # groups, tags self._assert_list(dataset_ref, self.DCAT.theme, [self.dcat_theme_prefix + x["name"] for x in dataset_dict["groups"]]) self._assert_list(dataset_ref, self.DCAT.keyword, [x["name"] for x in dataset_dict["tags"]]) # author, maintainer, originator, contributor, publisher self._assert_contact_info(dataset_ref, self.DCATDE.originator) self._assert_contact_info(dataset_ref, self.DCATDE.maintainer) self._assert_contact_info(dataset_ref, self.DCT.contributor) self._assert_contact_info(dataset_ref, self.DCT.creator) self._assert_contact_info(dataset_ref, self.DCT.publisher) # contactPoint self._assert_contact_point(dataset_ref) # temporal temporal = list(self.graph.objects(dataset_ref, self.DCT.temporal))[0] self.assertEqual(len(list(self.graph.objects(temporal, self.SCHEMA.startDate))), 1, self.SCHEMA.startDate + " not found") self.assertEqual(len(list(self.graph.objects(temporal, self.SCHEMA.endDate))), 1, self.SCHEMA.endDate + " not found") # spatial for spatial in list(self.graph.objects(dataset_ref, self.DCT.spatial)): geonodes = len(list(self.graph.objects(spatial, self.LOCN.geometry))) adminnodes = len(list(self.graph.objects(spatial, self.LOCN.adminUnitL2))) if geonodes > 0: self.assertEqual(geonodes, 2, self.LOCN.geometry + " not present, 2x") elif adminnodes > 0: self.assertEqual(adminnodes, 1, self.LOCN.adminUnitL2 + " not present") else: self.fail("No valid spatial blocks found.") # lists in extras self._assert_list(dataset_ref, self.DCT.language, self._get_value_from_extras(extras, "language")) self._assert_list(dataset_ref, self.DCT.conformsTo, self._get_value_from_extras(extras, "conforms_to")) self._assert_list(dataset_ref, self.ADMS.identifier, self._get_value_from_extras(extras, "alternate_identifier")) self._assert_list(dataset_ref, self.DCT.relation, self._get_value_from_extras(extras, "used_datasets")) self._assert_list(dataset_ref, self.DCT.hasVersion, self._get_value_from_extras(extras, "has_version")) self._assert_list(dataset_ref, self.DCT.isVersionOf, self._get_value_from_extras(extras, "is_version_of")) self._assert_list(dataset_ref, self.DCATDE.politicalGeocodingURI, self._get_value_from_extras(extras, "politicalGeocodingURI")) self._assert_list(dataset_ref, self.DCATDE.geocodingDescription, self._get_value_from_extras(extras, "geocodingText")) self._assert_list(dataset_ref, self.DCATDE.legalBasis, self._get_value_from_extras(extras, "legalbasisText")) self._assert_list(dataset_ref, self.DCATDE.contributorID, self._get_value_from_extras(extras, "contributorID")) # resources resource = dataset_dict["resources"][0] resource_ref = list(self.graph.objects(dataset_ref, self.DCAT.distribution))[0] resource_extras = resource["extras"] for key in resource: self._check_simple_items(resource, resource_ref, key) for key in resource_extras: self._check_simple_items(resource, resource_ref, key) # size self.assertEqual(len(list(self.graph.objects(resource_ref, self.DCAT.byteSize))), 1, self.DCAT.byteSize + " not found") # hash self.assertEqual(len(list(self.graph.objects(resource_ref, self.SPDX.checksum))), 1, self.SPDX.checksum + " not found") # lists self._assert_list(resource_ref, self.DCT.language, self._get_value_from_extras(resource_extras, "language")) self._assert_list(resource_ref, self.DCT.conformsTo, self._get_value_from_extras(resource_extras, "conforms_to"))