示例#1
0
    def _build_graph_and_check_format_mediatype(self, dataset_dict, expected_format, expected_mediatype):
        """
        Creates a graph based on the given dict and checks for dct:format and dct:mediaType in the
        first resource element.

        :param dataset_dict:
            dataset dict, expected to contain one resource
        :param expected_format:
            expected list of dct:format items in the resource
        :param expected_mediatype:
            expected list of dcat:mediaType items in the resource
        """
        self.graph = rdflib.Graph()
        dataset_ref = URIRef("http://example.org/datasets/1")

        dcat = EuropeanDCATAPProfile(self.graph, False)
        dcat.graph_from_dataset(dataset_dict, dataset_ref)

        dcatde = DCATdeProfile(self.graph, False)
        dcatde.graph_from_dataset(dataset_dict, dataset_ref)

        # graph should contain the expected nodes
        resource_ref = list(self.graph.objects(dataset_ref, self.DCAT.distribution))[0]
        dct_format = list(self.graph.objects(resource_ref, self.DCT['format']))
        dcat_mediatype = list(self.graph.objects(resource_ref, self.DCAT.mediaType))
        self.assertEqual(expected_format, dct_format)
        self.assertEqual(expected_mediatype, dcat_mediatype)
示例#2
0
    def test_graph_from_dataset_only_dcatde_contact_point_values(self):

        ### prepare ###
        self.graph = rdflib.Graph()
        dataset_dict = self._get_default_dataset_dict()
        # remove fields processed by ckanext-dcat default profile
        dataset_dict.pop('maintainer')
        dataset_dict.pop('maintainer_email')
        dataset_dict.pop('author')
        dataset_dict.pop('author_email')
        dataset_ref = URIRef("http://testuri/")

        ### execute ###
        dcat = EuropeanDCATAPProfile(self.graph, False)
        dcat.graph_from_dataset(dataset_dict, dataset_ref)

        dcatde = DCATdeProfile(self.graph, False)
        dcatde.graph_from_dataset(dataset_dict, dataset_ref)

        ### assert ###
        # contactPoint
        self._assert_contact_point(dataset_ref, [self.VCARD.fn, self.VCARD.hasEmail])
示例#3
0
    def test_graph_from_dataset(self):
        """ test dcat and dcatde profiles """

        dataset_dict = {
            "id":
            "dct:identifier",
            "notes":
            "dct:description",
            "title":
            "dct:title",
            "url":
            "dcat:landingPage",
            "version":
            "owl:versionInfo",
            "metadata_created":
            "2017-07-06T13:08:40",
            "metadata_modified":
            "2017-07-06T13:08:41",
            "license_id":
            "nocheck",
            "groups": [{
                "name": "GRUPPEA"
            }, {
                "name": "GRUPPEB"
            }],
            "tags": [{
                "name": "dcat:keyword"
            }, {
                "name": "tagB"
            }],
            "author":
            "nocheck",
            "author_email":
            "nocheck",
            "maintainer":
            "nocheck",
            "maintainer_email":
            "nocheck",
            "extras":
            self._transform_to_key_value({
                "contributorID": ["dcatde:contributorID"],
                "qualityProcessURI":
                "dcatde:qualityProcessURI",
                "documentation":
                "foaf:page",
                "frequency":
                "dct:accrualPeriodicity",
                "version_notes":
                "adms:versionNotes",
                "dcat_type":
                "dct:type",
                "author_url":
                "nocheck",
                "maintainer_url":
                "nocheck",
                "maintainer_tel":
                "nocheck",
                'maintainer_street':
                "nocheck",
                'maintainer_city':
                "nocheck",
                'maintainer_zip':
                "nocheck",
                'maintainer_country':
                "nocheck",
                "publisher_name":
                "nocheck",
                "publisher_email":
                "nocheck",
                "publisher_url":
                "nocheck",
                "originator_name":
                "nocheck",
                "originator_email":
                "nocheck",
                "originator_url":
                "nocheck",
                "contributor_name":
                "nocheck",
                "contributor_email":
                "nocheck",
                "contributor_url":
                "nocheck",
                "access_rights":
                "dct:accessRights",
                "provenance":
                "dct:provenance",
                "politicalGeocodingLevelURI":
                "dcatde:politicalGeocodingLevelURI",
                "politicalGeocodingURI": ["dcatde:politicalGeocodingURI"],
                "geocodingText": ["dcatde:geocodingText"],
                "legalbasisText": ["dcatde:legalbasisText"],
                "temporal_start":
                "2017-07-06T13:08:40",
                "temporal_end":
                "2017-07-06T13:08:41",
                "spatial":
                "{\"type\":\"Polygon\",\"coordinates\":[[[8.852920532226562," +
                "47.97245599240245],[9.133758544921875,47.97245599240245]," +
                "[9.133758544921875,48.17249666038475],[8.852920532226562," +
                "48.17249666038475],[8.852920532226562,47.97245599240245]]]}",
                "language": ["dct:language"],
                "conforms_to": ["dct:conformsTo"],
                "alternate_identifier": ["adms:identifier"],
                "used_datasets": ["dct:relation", "bla"],
                "has_version": ["dct:hasVersion"],
                "is_version_of": ["dct:isVersionOf"]
            }),
            "resources": [{
                "id":
                "id",
                "name":
                "dct:title",
                "description":
                "dct:description",
                "url":
                "dcat:accessURL",
                "format:":
                "dct:format",
                "mimetype":
                "dcat:mediaType",
                "size":
                10,
                "hash":
                24,
                "extras":
                self._transform_to_key_value({
                    "issued": "dct:issued",
                    "modified": "dct:modified",
                    "documentation": "foaf:page",
                    "download_url": "dcat:downloadURL",
                    "plannedAvailability": "dcatde:plannedAvailability",
                    "licenseAttributionByText":
                    "dcatde:licenseAttributionByText",
                    "license": "dct:license",
                    "rights": "dct:rights",
                    "status": "adms:status",
                    "language": ["dct:language"],
                    "conforms_to": ["dct:conformsTo"],
                })
            }]
        }

        dataset_ref = URIRef("http://testuri/")

        dcat = EuropeanDCATAPProfile(self.graph, False)
        dcat.graph_from_dataset(dataset_dict, dataset_ref)

        dcatde = DCATdeProfile(self.graph, False)
        dcatde.graph_from_dataset(dataset_dict, dataset_ref)

        # Assert structure of graph - basic values
        extras = dataset_dict["extras"]

        for key in dataset_dict:
            self._check_simple_items(dataset_dict, dataset_ref, key)

        for key in extras:
            self._check_simple_items(dataset_dict, dataset_ref, key)

        # issued, modified
        self.assertEqual(
            len(list(self.graph.objects(dataset_ref, self.DCT.issued))), 1,
            "dct:issued not found")
        self.assertEqual(
            len(list(self.graph.objects(dataset_ref, self.DCT.modified))), 1,
            "dct:modified not found")

        # groups, tags
        self._assert_list(dataset_ref, self.DCAT.theme, [
            self.dcat_theme_prefix + x["name"] for x in dataset_dict["groups"]
        ])
        self._assert_list(dataset_ref, self.DCAT.keyword,
                          [x["name"] for x in dataset_dict["tags"]])

        # author, maintainer, originator, contributor, publisher
        self._assert_contact_info(dataset_ref, self.DCATDE.originator)
        self._assert_contact_info(dataset_ref, self.DCATDE.maintainer)
        self._assert_contact_info(dataset_ref, self.DCT.contributor)
        self._assert_contact_info(dataset_ref, self.DCT.creator)
        self._assert_contact_info(dataset_ref, self.DCT.publisher)

        # contactPoint
        contact_point = next(
            self.graph.objects(dataset_ref, self.DCAT.contactPoint))
        vcard_attrs = [
            self.VCARD.fn, self.VCARD.hasEmail, self.VCARD.hasURL,
            self.VCARD.hasTelephone, self.VCARD.hasStreetAddress,
            self.VCARD.hasLocality, self.VCARD.hasCountryName,
            self.VCARD.hasPostalCode
        ]
        for v_attr in vcard_attrs:
            self.assertEqual(
                len(list(self.graph.objects(contact_point, v_attr))), 1,
                self.DCAT.contactPoint + str(v_attr) + " not found")

        # temporal
        temporal = list(self.graph.objects(dataset_ref, self.DCT.temporal))[0]
        self.assertEqual(
            len(list(self.graph.objects(temporal, self.SCHEMA.startDate))), 1,
            self.SCHEMA.startDate + " not found")
        self.assertEqual(
            len(list(self.graph.objects(temporal, self.SCHEMA.endDate))), 1,
            self.SCHEMA.endDate + " not found")

        # spatial
        for spatial in list(self.graph.objects(dataset_ref, self.DCT.spatial)):
            geonodes = len(
                list(self.graph.objects(spatial, self.LOCN.geometry)))
            adminnodes = len(
                list(self.graph.objects(spatial, self.LOCN.adminUnitL2)))
            if geonodes > 0:
                self.assertEqual(geonodes, 2,
                                 self.LOCN.geometry + " not present, 2x")
            elif adminnodes > 0:
                self.assertEqual(adminnodes, 1,
                                 self.LOCN.adminUnitL2 + " not present")
            else:
                self.fail("No valid spatial blocks found.")

        # lists in extras
        self._assert_list(dataset_ref, self.DCT.language,
                          self._get_value_from_extras(extras, "language"))
        self._assert_list(dataset_ref, self.DCT.conformsTo,
                          self._get_value_from_extras(extras, "conforms_to"))
        self._assert_list(
            dataset_ref, self.ADMS.identifier,
            self._get_value_from_extras(extras, "alternate_identifier"))
        self._assert_list(dataset_ref, self.DCT.relation,
                          self._get_value_from_extras(extras, "used_datasets"))
        self._assert_list(dataset_ref, self.DCT.hasVersion,
                          self._get_value_from_extras(extras, "has_version"))
        self._assert_list(dataset_ref, self.DCT.isVersionOf,
                          self._get_value_from_extras(extras, "is_version_of"))
        self._assert_list(
            dataset_ref, self.DCATDE.politicalGeocodingURI,
            self._get_value_from_extras(extras, "politicalGeocodingURI"))
        self._assert_list(dataset_ref, self.DCATDE.geocodingText,
                          self._get_value_from_extras(extras, "geocodingText"))
        self._assert_list(
            dataset_ref, self.DCATDE.legalbasisText,
            self._get_value_from_extras(extras, "legalbasisText"))
        self._assert_list(dataset_ref, self.DCATDE.contributorID,
                          self._get_value_from_extras(extras, "contributorID"))

        # resources
        resource = dataset_dict["resources"][0]
        resource_ref = list(
            self.graph.objects(dataset_ref, self.DCAT.distribution))[0]
        resource_extras = resource["extras"]

        for key in resource:
            self._check_simple_items(resource, resource_ref, key)

        for key in resource_extras:
            self._check_simple_items(resource, resource_ref, key)

        # size
        self.assertEqual(
            len(list(self.graph.objects(resource_ref, self.DCAT.byteSize))), 1,
            self.DCAT.byteSize + " not found")

        # hash
        self.assertEqual(
            len(list(self.graph.objects(resource_ref, self.SPDX.checksum))), 1,
            self.SPDX.checksum + " not found")

        # lists
        self._assert_list(
            resource_ref, self.DCT.language,
            self._get_value_from_extras(resource_extras, "language"))
        self._assert_list(
            resource_ref, self.DCT.conformsTo,
            self._get_value_from_extras(resource_extras, "conforms_to"))
示例#4
0
    def test_graph_from_dataset(self):
        """ test dcat and dcatde profiles """

        ### prepare ###
        self.graph = rdflib.Graph()
        dataset_dict = self._get_default_dataset_dict()
        dataset_ref = URIRef("http://testuri/")

        dcat = EuropeanDCATAPProfile(self.graph, False)
        dcat.graph_from_dataset(dataset_dict, dataset_ref)

        dcatde = DCATdeProfile(self.graph, False)
        dcatde.graph_from_dataset(dataset_dict, dataset_ref)

        # Assert structure of graph - basic values
        extras = dataset_dict["extras"]

        for key in dataset_dict:
            self._check_simple_items(dataset_dict, dataset_ref, key)

        for key in extras:
            self._check_simple_items(dataset_dict, dataset_ref, key)

        # issued, modified
        self.assertEqual(len(list(self.graph.objects(dataset_ref, self.DCT.issued))), 1,
                         "dct:issued not found")
        self.assertEqual(len(list(self.graph.objects(dataset_ref, self.DCT.modified))), 1,
                         "dct:modified not found")

        # groups, tags
        self._assert_list(dataset_ref, self.DCAT.theme,
                         [self.dcat_theme_prefix + x["name"] for x in dataset_dict["groups"]])
        self._assert_list(dataset_ref, self.DCAT.keyword,
                         [x["name"] for x in dataset_dict["tags"]])

        # author, maintainer, originator, contributor, publisher
        self._assert_contact_info(dataset_ref, self.DCATDE.originator)
        self._assert_contact_info(dataset_ref, self.DCATDE.maintainer)
        self._assert_contact_info(dataset_ref, self.DCT.contributor)
        self._assert_contact_info(dataset_ref, self.DCT.creator)
        self._assert_contact_info(dataset_ref, self.DCT.publisher)

        # contactPoint
        self._assert_contact_point(dataset_ref)

        # temporal
        temporal = list(self.graph.objects(dataset_ref, self.DCT.temporal))[0]
        self.assertEqual(len(list(self.graph.objects(temporal, self.SCHEMA.startDate))), 1,
                         self.SCHEMA.startDate + " not found")
        self.assertEqual(len(list(self.graph.objects(temporal, self.SCHEMA.endDate))), 1,
                         self.SCHEMA.endDate + " not found")

        # spatial
        for spatial in list(self.graph.objects(dataset_ref, self.DCT.spatial)):
            geonodes = len(list(self.graph.objects(spatial, self.LOCN.geometry)))
            adminnodes = len(list(self.graph.objects(spatial, self.LOCN.adminUnitL2)))
            if geonodes > 0:
                self.assertEqual(geonodes, 2, self.LOCN.geometry + " not present, 2x")
            elif adminnodes > 0:
                self.assertEqual(adminnodes, 1, self.LOCN.adminUnitL2 + " not present")
            else:
                self.fail("No valid spatial blocks found.")

        # lists in extras
        self._assert_list(dataset_ref, self.DCT.language,
                         self._get_value_from_extras(extras, "language"))
        self._assert_list(dataset_ref, self.DCT.conformsTo,
                         self._get_value_from_extras(extras, "conforms_to"))
        self._assert_list(dataset_ref, self.ADMS.identifier,
                         self._get_value_from_extras(extras, "alternate_identifier"))
        self._assert_list(dataset_ref, self.DCT.relation,
                         self._get_value_from_extras(extras, "used_datasets"))
        self._assert_list(dataset_ref, self.DCT.hasVersion,
                         self._get_value_from_extras(extras, "has_version"))
        self._assert_list(dataset_ref, self.DCT.isVersionOf,
                         self._get_value_from_extras(extras, "is_version_of"))
        self._assert_list(dataset_ref, self.DCATDE.politicalGeocodingURI,
                         self._get_value_from_extras(extras, "politicalGeocodingURI"))
        self._assert_list(dataset_ref, self.DCATDE.geocodingDescription,
                         self._get_value_from_extras(extras, "geocodingText"))
        self._assert_list(dataset_ref, self.DCATDE.legalBasis,
                         self._get_value_from_extras(extras, "legalbasisText"))
        self._assert_list(dataset_ref, self.DCATDE.contributorID,
                         self._get_value_from_extras(extras, "contributorID"))

        # resources
        resource = dataset_dict["resources"][0]
        resource_ref = list(self.graph.objects(dataset_ref, self.DCAT.distribution))[0]
        resource_extras = resource["extras"]

        for key in resource:
            self._check_simple_items(resource, resource_ref, key)

        for key in resource_extras:
            self._check_simple_items(resource, resource_ref, key)

        # size
        self.assertEqual(len(list(self.graph.objects(resource_ref, self.DCAT.byteSize))), 1,
                         self.DCAT.byteSize + " not found")

        # hash
        self.assertEqual(len(list(self.graph.objects(resource_ref, self.SPDX.checksum))), 1,
                         self.SPDX.checksum + " not found")

        # lists
        self._assert_list(resource_ref, self.DCT.language,
                         self._get_value_from_extras(resource_extras, "language"))
        self._assert_list(resource_ref, self.DCT.conformsTo,
                         self._get_value_from_extras(resource_extras, "conforms_to"))