def dataset_uri(dataset_dict): ''' Returns an URI for the dataset This will be used to uniquely reference the dataset on the RDF serializations. The value will be the first found of: 1. The value of the `uri` field 2. The value of an extra with key `uri` 3. `catalog_uri()` + '/dataset/' + `id` field Check the documentation for `catalog_uri()` for the recommended ways of setting it. Returns a string with the dataset URI. ''' uri = dataset_dict.get('uri') if not uri: for extra in dataset_dict.get('extras', []): if extra['key'] == 'uri' and extra['value'] != 'None': uri = extra['value'] break if not uri and dataset_dict.get('id'): uri = '{0}/catalogo/{1}'.format(catalog_uri().rstrip('/'), dataset_dict['id']) if not uri: uri = '{0}/catalogo/{1}'.format(catalog_uri().rstrip('/'), str(uuid.uuid4())) log.warning('Using a random id for dataset URI') return uri
def update_config(self, config): p.toolkit.add_template_directory(config, 'templates') # Check catalog URI on startup to emit a warning if necessary utils.catalog_uri() # Check custom catalog endpoint custom_endpoint = config.get(CUSTOM_ENDPOINT_CONFIG) if custom_endpoint: if not custom_endpoint[:1] == '/': raise Exception( '"{0}" should start with a backslash (/)'.format( CUSTOM_ENDPOINT_CONFIG)) if '{_format}' not in custom_endpoint: raise Exception('"{0}" should contain {{_format}}'.format( CUSTOM_ENDPOINT_CONFIG))
def _dge_harvest_dataset_uri(self, dataset_dict): ''' Returns an URI for the dataset This will be used to uniquely reference the dataset on the RDF serializations. The value will be the first found of: 1. `catalog_uri()` + '/catalogo/' + `name` field 2. The value of the `uri` field 3. The value of an extra with key `uri` 4. `catalog_uri()` + '/catalogo/' + `id` field Check the documentation for `catalog_uri()` for the recommended ways of setting it. Returns a string with the dataset URI. ''' if dataset_dict.get('name'): uri = '{0}/catalogo/{1}'.format(catalog_uri().rstrip('/'), dataset_dict['name']) if not uri: uri = dataset_uri(dataset_dict) return uri
def graph_from_catalog(self, catalog_dict, catalog_ref): g = self.g for prefix, namespace in it_namespaces.iteritems(): g.bind(prefix, namespace) ### Add a further type for the Catalog node g.add((catalog_ref, RDF.type, DCATAPIT.Catalog)) ### Replace homepage # Try to avoid to have the Catalog URIRef identical to the homepage URI g.remove( (catalog_ref, FOAF.homepage, URIRef(config.get('ckan.site_url')))) g.add((catalog_ref, FOAF.homepage, URIRef(catalog_uri() + '/#'))) ### publisher pub_agent_name = config.get('ckanext.dcatapit_configpublisher_name', 'unknown') pub_agent_id = config.get( 'ckanext.dcatapit_configpublisher_code_identifier', 'unknown') agent = BNode() self.g.add((agent, RDF['type'], DCATAPIT.Agent)) self.g.add((agent, RDF['type'], FOAF.Agent)) self.g.add((catalog_ref, DCT.publisher, agent)) self.g.add((agent, FOAF.name, Literal(pub_agent_name))) self.g.add((agent, DCT.identifier, Literal(pub_agent_id))) ### issued date issued = config.get('ckanext.dcatapit_config.catalog_issued', '1900-01-01') if issued: self._add_date_triple(catalog_ref, DCT.issued, issued) ### theme taxonomy # <dcat:themeTaxonomy rdf:resource="http://publications.europa.eu/resource/authority/data-theme"/> # <skos:ConceptScheme rdf:about="http://publications.europa.eu/resource/authority/data-theme"> # <dct:title xml:lang="it">Il Vocabolario Data Theme</dct:title> # </skos:ConceptScheme> taxonomy = URIRef(THEME_BASE_URI.rstrip('/')) self.g.add((catalog_ref, DCAT.themeTaxonomy, taxonomy)) self.g.add((taxonomy, RDF.type, SKOS.ConceptScheme)) self.g.add((taxonomy, DCT.title, Literal('Il Vocabolario Data Theme', lang='it'))) ### language langs = config.get('ckan.locales_offered', 'it') for lang_offered in langs.split(): lang_code = lang_mapping_ckan_to_voc.get(lang_offered) if lang_code: self.g.add((catalog_ref, DCT.language, URIRef(LANG_BASE_URI + lang_code))) self.g.remove( (catalog_ref, DCT.language, Literal(config.get(DEFAULT_LANG))))
def resource_uri(resource_dict): ''' Returns an URI for the resource This will be used to uniquely reference the resource on the RDF serializations. The value will be the first found of: 1. The value of the `uri` field 2. `catalog_uri()` + '/dataset/' + `package_id` + '/resource/' + `id` field Check the documentation for `catalog_uri()` for the recommended ways of setting it. Returns a string with the resource URI. ''' uri = resource_dict.get('uri') if not uri or uri == 'None': dataset_id = dataset_id_from_resource(resource_dict) uri = '{0}/catalog/{1}/resource/{2}'.format(catalog_uri().rstrip('/'), dataset_id, resource_dict['id']) return uri
def update_config(self, config): p.toolkit.add_template_directory(config, 'templates') # Check catalog URI on startup to emit a warning if necessary catalog_uri() # Check custom catalog endpoint custom_endpoint = config.get(CUSTOM_ENDPOINT_CONFIG) if custom_endpoint: if not custom_endpoint[:1] == '/': raise Exception( '"{0}" should start with a backslash (/)'.format( CUSTOM_ENDPOINT_CONFIG)) if '{_format}' not in custom_endpoint: raise Exception( '"{0}" should contain {{_format}}'.format( CUSTOM_ENDPOINT_CONFIG))
def test_graph_from_catalog_modified_date(self): dataset = factories.Dataset() s = RDFSerializer() g = s.g catalog = s.graph_from_catalog() eq_(unicode(catalog), utils.catalog_uri()) assert self._triple(g, catalog, DCT.modified, dataset['metadata_modified'], XSD.dateTime)
def object_uri(record_dict): """ Returns an URI for an object This will be used to uniquely reference the dataset on the RDF serializations. Returns a string with the dataset URI. """ uuid = record_dict.get('uuid') uri = '{0}/object/{1}'.format(catalog_uri().rstrip('/'), uuid) return uri
def test_graph_from_catalog(self): s = RDFSerializer() g = s.g catalog = s.graph_from_catalog() eq_(unicode(catalog), utils.catalog_uri()) # Basic fields assert self._triple(g, catalog, RDF.type, DCAT.Catalog) assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title')) assert self._triple(g, catalog, FOAF.homepage, config.get('ckan.site_url')) assert self._triple(g, catalog, DCT.language, 'en')
def test_graph_from_catalog(self): s = RDFSerializer() g = s.g catalog = s.graph_from_catalog() eq_(unicode(catalog), utils.catalog_uri()) # Basic fields assert self._triple(g, catalog, RDF.type, DCAT.Catalog) assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title')) assert self._triple(g, catalog, FOAF.homepage, URIRef(config.get('ckan.site_url'))) assert self._triple(g, catalog, DCT.language, 'en')
def graph_from_catalog(self, catalog_dict=None): ''' Creates a graph for the catalog (CKAN site) using the loaded profiles The class RDFLib graph (accessible via `serializer.g`) will be updated by the loaded profiles. Returns the reference to the catalog, which will be an rdflib URIRef. ''' catalog_ref = URIRef(catalog_uri()) for profile_class in self._profiles: profile = profile_class(self.g, self.compatibility_mode) profile.graph_from_catalog(catalog_dict, catalog_ref) return catalog_ref
def _page_url(page): base_url = catalog_uri() base_url = '%s%s' % ( base_url, toolkit.request.path) params = [p for p in toolkit.request.params.items() if p[0] != 'page' and p[0] in ('modified_since', 'profiles', 'q', 'fq')] if params: qs = '&'.join(['{0}={1}'.format(p[0], p[1]) for p in params]) return '{0}?{1}&page={2}'.format( base_url, qs, page ) else: return '{0}?page={1}'.format( base_url, page )
def organization_uri(orga_dict): ''' Returns an URI for the organization This will be used to uniquely reference the organization on the RDF serializations. The value will be `catalog_uri()` + '/organization/' + `orga_id` Check the documentation for `catalog_uri()` for the recommended ways of setting it. Returns a string with the resource URI. ''' uri = '{0}/organization/{1}'.format(catalog_uri().rstrip('/'), orga_dict.get('id', None)) return uri
def test_graph_from_catalog_dict(self): catalog_dict = { "title": "My Catalog", "description": "An Open Data Catalog", "homepage": "http://example.com", "language": "de", } s = RDFSerializer() g = s.g catalog = s.graph_from_catalog(catalog_dict) eq_(unicode(catalog), utils.catalog_uri()) # Basic fields assert self._triple(g, catalog, RDF.type, DCAT.Catalog) assert self._triple(g, catalog, DCT.title, catalog_dict["title"]) assert self._triple(g, catalog, DCT.description, catalog_dict["description"]) assert self._triple(g, catalog, FOAF.homepage, URIRef(catalog_dict["homepage"])) assert self._triple(g, catalog, DCT.language, catalog_dict["language"])
def test_graph_from_catalog_dict(self): catalog_dict = { 'title': 'My Catalog', 'description': 'An Open Data Catalog', 'homepage': 'http://example.com', 'language': 'de', } s = RDFSerializer() g = s.g catalog = s.graph_from_catalog(catalog_dict) eq_(unicode(catalog), utils.catalog_uri()) # Basic fields assert self._triple(g, catalog, RDF.type, DCAT.Catalog) assert self._triple(g, catalog, DCT.title, catalog_dict['title']) assert self._triple(g, catalog, DCT.description, catalog_dict['description']) assert self._triple(g, catalog, FOAF.homepage, catalog_dict['homepage']) assert self._triple(g, catalog, DCT.language, catalog_dict['language'])
def test_graph_from_catalog_dict(self): catalog_dict = { 'title': 'My Catalog', 'description': 'An Open Data Catalog', 'homepage': 'http://example.com', 'language': 'de', } s = RDFSerializer() g = s.g catalog = s.graph_from_catalog(catalog_dict) eq_(unicode(catalog), utils.catalog_uri()) # Basic fields assert self._triple(g, catalog, RDF.type, DCAT.Catalog) assert self._triple(g, catalog, DCT.title, catalog_dict['title']) assert self._triple(g, catalog, DCT.description, catalog_dict['description']) assert self._triple(g, catalog, FOAF.homepage, URIRef(catalog_dict['homepage'])) assert self._triple(g, catalog, DCT.language, catalog_dict['language'])
def test_graph_from_catalog_dict_language_uri_ref(self): catalog_dict = { 'title': 'My Catalog', 'description': 'An Open Data Catalog', 'homepage': 'http://example.com', 'language': 'http://publications.europa.eu/resource/authority/language/ITA', } s = RDFSerializer() g = s.g catalog = s.graph_from_catalog(catalog_dict) eq_(unicode(catalog), utils.catalog_uri()) # language field assert self._triple(g, catalog, DCT.language, URIRef(catalog_dict['language']))
def graph_from_dataset(self, dataset_dict, dataset_ref): namespaces = { 'dc': DC, 'dcat': DCAT, 'adms': ADMS, 'vcard': VCARD, 'foaf': FOAF, 'schema': SCHEMA, 'time': TIME, 'skos': SKOS, 'locn': LOCN, 'gsp': GSP, 'owl': OWL, 'tdwgi': TDWGI, 'aiiso': AIISO, 'mads': MADS, 'void': VOID, 'cc': CC, 'org': ORG } g = self.g context = self.get_context() # Add some more namespaces for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) # Add #dataset to the dataset URI to denote the conceptual object - the actual dataset # Without #dataset is the metadata - and that needs CC0 for BBC res dataset_uri = URIRef(dataset_ref + '#dataset') # Add dataset description (NB: This isn't the dataset - this is the dataset metadata) dataset_metadata_uri = URIRef(dataset_ref) g.add((dataset_metadata_uri, RDF.type, VOID.DatasetDescription)) g.add((dataset_metadata_uri, CC.license, URIRef(METADATA_LICENCE))) # This metadata describes #dataset g.add((dataset_metadata_uri, FOAF.primaryTopic, dataset_uri)) # If it is possible to access the RDF via dataset name, not uuid # In which case add a sameAs for the dataset name uri if dataset_dict['name'] in request.environ.get('CKAN_CURRENT_URL'): alt_dataset_uri = '{0}/dataset/{1}'.format(catalog_uri().rstrip('/'), dataset_dict['name']) # Add a sameAs link g.add((dataset_metadata_uri, OWL.sameAs, URIRef(alt_dataset_uri))) # And now we can describe the dataset itself g.add((dataset_uri, RDF.type, DCAT.Dataset)) # Basic fields items = [ ('title', DC.title, None, Literal), ('url', DCAT.landingPage, None, URIRef), ] self._add_triples_from_dict(dataset_dict, dataset_uri, items) if dataset_dict.get('notes', None): g.add((dataset_uri, DCAT.description, Literal(dataset_dict['notes']))) # Add DOI doi = dataset_dict.get('doi', None) if doi: g.set((dataset_uri, DC.identifier, URIRef(doi))) # Tags for tag in dataset_dict.get('tags', []): g.add((dataset_uri, DCAT.keyword, Literal(tag['name']))) # Dates items = [ ('issued', DC.issued, ['metadata_created'], Literal), ('modified', DC.modified, ['metadata_modified'], Literal), ] self._add_date_triples_from_dict(dataset_dict, dataset_uri, items) # We don't have maintainers - whoever added it to the portal is the maintainer creator_user_id = dataset_dict['creator_user_id'] user = toolkit.get_action('user_show')(context, { 'id': creator_user_id, }) # Add publisher nhm_uri = self.graph_add_museum() if user: # if this is the admin user, just add the contactPoint if user['sysadmin'] and user['name'] == 'admin': g.add((dataset_uri, DCAT.contactPoint, nhm_uri)) else: user_uri = URIRef(self.user_uri(creator_user_id)) g.add((user_uri, RDF.type, VCARD.Person)) if 'fullname' in user: g.add((user_uri, VCARD.fn, Literal(user['fullname']))) if 'email' in user: g.add((user_uri, VCARD.hasEmail, URIRef(user['email']))) # All users are members of the NHM g.add((user_uri, MADS.hasAffiliation, nhm_uri)) # This user is the contact point for the dataset g.add((dataset_uri, DCAT.contactPoint, user_uri)) # Add update frequency update_frequency = dataset_dict.get('update_frequency', None) if update_frequency: code = self._get_update_frequency_code(update_frequency) if code: g.set((dataset_uri, DC.accrualPeriodicity, URIRef(SDMX_CODE[code]))) # Add licence - use URL if we have it # Otherwise try using the licence title if dataset_dict.get('license_url', None): g.set((dataset_uri, DC.license, URIRef(dataset_dict['license_url']))) elif dataset_dict.get('license_title', None): g.set((dataset_uri, DC.license, Literal(dataset_dict['license_title']))) # Add categories # Create concept schema for all categories, add link any related to the dataset for category in dataset_dict['dataset_category']: # print category n = BNode() g.add((n, rdflib.RDF.type, SKOS.Concept)) g.add((n, SKOS.prefLabel, Literal(category))) g.add((dataset_uri, DCAT.theme, n)) # Temporal extent temporal_extent = dataset_dict.get('temporal_extent', None) if temporal_extent: g.add((dataset_uri, DC.temporal, Literal(temporal_extent))) author = dataset_dict.get('author', None) if author: if author == 'Natural History Museum': g.add((dataset_uri, DC.creator, nhm_uri)) else: author_details = BNode() g.add((author_details, VCARD.fn, Literal(author))) if dataset_dict.get('author_email', None): g.add((author_details, VCARD.hasEmail, Literal(dataset_dict['author_email']))) g.add((author_details, RDF.type, VCARD.Person)) g.add((dataset_uri, DC.creator, author_details)) affiliation = dataset_dict.get('affiliation', None) if affiliation: if affiliation == 'Natural History Museum': g.add((author_details, MADS.hasAffiliation, nhm_uri)) else: g.add((author_details, MADS.hasAffiliation, Literal(affiliation))) contributors = dataset_dict.get('contributors', None) if contributors: g.add((dataset_uri, DC.contributor, Literal(contributors))) self.graph_add_resources(dataset_uri, dataset_dict)
def user_uri(id): return '{0}/user/{1}'.format(catalog_uri().rstrip('/'), id)
def graph_from_dataset(self, dataset_dict, dataset_ref): title = dataset_dict.get('title') g = self.g for prefix, namespace in it_namespaces.iteritems(): g.bind(prefix, namespace) ### add a further type for the Dataset node g.add((dataset_ref, RDF.type, DCATAPIT.Dataset)) ### replace themes value = self._get_dict_value(dataset_dict, 'theme') if value: for theme in value.split(','): self.g.remove((dataset_ref, DCAT.theme, URIRef(theme))) theme = theme.replace('{', '').replace('}', '') self.g.add( (dataset_ref, DCAT.theme, URIRef(THEME_BASE_URI + theme))) self._add_concept(THEME_CONCEPTS, theme) else: self.g.add((dataset_ref, DCAT.theme, URIRef(THEME_BASE_URI + DEFAULT_THEME_KEY))) self._add_concept(THEME_CONCEPTS, DEFAULT_THEME_KEY) ### replace languages value = self._get_dict_value(dataset_dict, 'language') if value: for lang in value.split(','): self.g.remove((dataset_ref, DCT.language, Literal(lang))) lang = lang.replace('{', '').replace('}', '') self.g.add( (dataset_ref, DCT.language, URIRef(LANG_BASE_URI + lang))) # self._add_concept(LANG_CONCEPTS, lang) ### add spatial (EU URI) value = self._get_dict_value(dataset_dict, 'geographical_name') if value: for gname in value.split(','): gname = gname.replace('{', '').replace('}', '') dct_location = BNode() self.g.add((dataset_ref, DCT.spatial, dct_location)) self.g.add((dct_location, RDF['type'], DCT.Location)) # Try and add a Concept from the spatial vocabulary if self._add_concept(GEO_CONCEPTS, gname): self.g.add((dct_location, DCATAPIT.geographicalIdentifier, Literal(GEO_BASE_URI + gname))) # geo concept is not really required, but may be a useful adding self.g.add((dct_location, LOCN.geographicalName, URIRef(GEO_BASE_URI + gname))) else: # The dataset field is not a controlled tag, let's create a Concept out of the label we have concept = BNode() self.g.add((concept, RDF['type'], SKOS.Concept)) self.g.add((concept, SKOS.prefLabel, Literal(gname))) self.g.add((dct_location, LOCN.geographicalName, concept)) ### add spatial (GeoNames) value = self._get_dict_value(dataset_dict, 'geographical_geonames_url') if value: dct_location = BNode() self.g.add((dataset_ref, DCT.spatial, dct_location)) self.g.add((dct_location, RDF['type'], DCT.Location)) self.g.add((dct_location, DCATAPIT.geographicalIdentifier, Literal(value))) ### replace periodicity self._remove_node(dataset_dict, dataset_ref, ('frequency', DCT.accrualPeriodicity, None, Literal)) self._add_uri_node( dataset_dict, dataset_ref, ('frequency', DCT.accrualPeriodicity, DEFAULT_FREQ_CODE, URIRef), FREQ_BASE_URI) # self._add_concept(FREQ_CONCEPTS, dataset_dict.get('frequency', DEFAULT_VOCABULARY_KEY)) ### replace landing page self._remove_node(dataset_dict, dataset_ref, ('url', DCAT.landingPage, None, URIRef)) landing_page_uri = None if dataset_dict.get('name'): landing_page_uri = '{0}/dataset/{1}'.format( catalog_uri().rstrip('/'), dataset_dict['name']) else: landing_page_uri = dataset_uri( dataset_dict) # TODO: preserve original URI if harvested self.g.add((dataset_ref, DCAT.landingPage, URIRef(landing_page_uri))) ### conformsTo self.g.remove((dataset_ref, DCT.conformsTo, None)) value = self._get_dict_value(dataset_dict, 'conforms_to') if value: for item in value.split(','): standard = BNode() self.g.add((dataset_ref, DCT.conformsTo, standard)) self.g.add((standard, RDF['type'], DCT.Standard)) self.g.add((standard, RDF['type'], DCATAPIT.Standard)) self.g.add((standard, DCT.identifier, Literal(item))) ### publisher # DCAT by default creates this node # <dct:publisher> # <foaf:Organization rdf:about="http://10.10.100.75/organization/55535226-f82a-4cf7-903a-3e10afeaa79a"> # <foaf:name>orga2_test</foaf:name> # </foaf:Organization> # </dct:publisher> for s, p, o in g.triples((dataset_ref, DCT.publisher, None)): #log.info("Removing publisher %r", o) g.remove((s, p, o)) self._add_agent(dataset_dict, dataset_ref, 'publisher', DCT.publisher) ### Rights holder : Agent holder_ref = self._add_agent(dataset_dict, dataset_ref, 'holder', DCT.rightsHolder) ### Autore : Agent self._add_agent(dataset_dict, dataset_ref, 'creator', DCT.creator) ### Point of Contact # <dcat:contactPoint rdf:resource="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"/> # <!-- http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri --> # <dcatapit:Organization rdf:about="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"> # <rdf:type rdf:resource="&vcard;Kind"/> # <rdf:type rdf:resource="&vcard;Organization"/> # <vcard:hasEmail rdf:resource="mailto:[email protected]"/> # <vcard:fn>Regione Liguria - Sportello Cartografico</vcard:fn> # </dcatapit:Organization> # TODO: preserve original info if harvested # retrieve the contactPoint added by the euro serializer euro_poc = g.value(subject=dataset_ref, predicate=DCAT.contactPoint, object=None, any=False) # euro poc has this format: # <dcat:contactPoint> # <vcard:Organization rdf:nodeID="Nfcd06f452bcd41f48f33c45b0c95979e"> # <vcard:fn>THE ORGANIZATION NAME</vcard:fn> # <vcard:hasEmail>THE ORGANIZATION EMAIL</vcard:hasEmail> # </vcard:Organization> # </dcat:contactPoint> if euro_poc: g.remove((dataset_ref, DCAT.contactPoint, euro_poc)) org_id = dataset_dict.get('organization', {}).get('id') # get orga info org_show = logic.get_action('organization_show') try: org_dict = org_show({}, { 'id': org_id, 'include_datasets': False, 'include_tags': False, 'include_users': False, 'include_groups': False, 'include_extras': True, 'include_followers': False }) except Exception, e: org_dict = {}
def update_config(self, config): p.toolkit.add_template_directory(config, 'templates') # Check catalog URI on startup to emit a warning if necessary catalog_uri()
def graph_from_dataset(self, dataset_dict, dataset_ref): title = dataset_dict.get('title') g = self.g for prefix, namespace in it_namespaces.iteritems(): g.bind(prefix, namespace) ### add a further type for the Dataset node g.add((dataset_ref, RDF.type, DCATAPIT.Dataset)) ### replace themes value = self._get_dict_value(dataset_dict, 'theme') self._add_themes(dataset_ref, value) ### replace languages value = self._get_dict_value(dataset_dict, 'language') if value: for lang in value.split(','): self.g.remove((dataset_ref, DCT.language, Literal(lang))) lang = lang.replace('{', '').replace('}', '') self.g.add( (dataset_ref, DCT.language, URIRef(LANG_BASE_URI + lang))) # self._add_concept(LANG_CONCEPTS, lang) ### add spatial (EU URI) value = self._get_dict_value(dataset_dict, 'geographical_name') if value: for gname in value.split(','): gname = gname.replace('{', '').replace('}', '') dct_location = BNode() self.g.add((dataset_ref, DCT.spatial, dct_location)) self.g.add((dct_location, RDF['type'], DCT.Location)) # Try and add a Concept from the spatial vocabulary if self._add_concept(GEO_CONCEPTS, gname): self.g.add((dct_location, DCATAPIT.geographicalIdentifier, Literal(GEO_BASE_URI + gname))) # geo concept is not really required, but may be a useful adding self.g.add((dct_location, LOCN.geographicalName, URIRef(GEO_BASE_URI + gname))) else: # The dataset field is not a controlled tag, let's create a Concept out of the label we have concept = BNode() self.g.add((concept, RDF['type'], SKOS.Concept)) self.g.add((concept, SKOS.prefLabel, Literal(gname))) self.g.add((dct_location, LOCN.geographicalName, concept)) ### add spatial (GeoNames) value = self._get_dict_value(dataset_dict, 'geographical_geonames_url') if value: dct_location = BNode() self.g.add((dataset_ref, DCT.spatial, dct_location)) self.g.add((dct_location, RDF['type'], DCT.Location)) self.g.add((dct_location, DCATAPIT.geographicalIdentifier, Literal(value))) ### replace periodicity self._remove_node(dataset_dict, dataset_ref, ('frequency', DCT.accrualPeriodicity, None, Literal)) self._add_uri_node( dataset_dict, dataset_ref, ('frequency', DCT.accrualPeriodicity, DEFAULT_FREQ_CODE, URIRef), FREQ_BASE_URI) # self._add_concept(FREQ_CONCEPTS, dataset_dict.get('frequency', DEFAULT_VOCABULARY_KEY)) ### replace landing page self._remove_node(dataset_dict, dataset_ref, ('url', DCAT.landingPage, None, URIRef)) landing_page_uri = None if dataset_dict.get('name'): landing_page_uri = '{0}/dataset/{1}'.format( catalog_uri().rstrip('/'), dataset_dict['name']) else: landing_page_uri = dataset_uri( dataset_dict) # TODO: preserve original URI if harvested self.g.add((dataset_ref, DCAT.landingPage, URIRef(landing_page_uri))) ### conformsTo self.g.remove((dataset_ref, DCT.conformsTo, None)) value = self._get_dict_value(dataset_dict, 'conforms_to') if value: try: conforms_to = json.loads(value) except ( TypeError, ValueError, ): log.warn("Cannot deserialize DCATAPIT:conformsTo value: %s", value) conforms_to = [] for item in conforms_to: standard = URIRef(item['uri']) if item.get('uri') else BNode() self.g.add((dataset_ref, DCT.conformsTo, standard)) self.g.add((standard, RDF['type'], DCT.Standard)) self.g.add((standard, RDF['type'], DCATAPIT.Standard)) self.g.add( (standard, DCT.identifier, Literal(item['identifier']))) for lang, val in (item.get('title') or {}).items(): if lang in OFFERED_LANGS: self.g.add( (standard, DCT.title, Literal(val, lang=lang_mapping_ckan_to_xmllang.get( lang, lang)))) for lang, val in (item.get('description') or {}).items(): if lang in OFFERED_LANGS: self.g.add( (standard, DCT.description, Literal(val, lang=lang_mapping_ckan_to_xmllang.get( lang, lang)))) for reference_document in (item.get('referenceDocumentation') or []): self.g.add((standard, DCATAPIT.referenceDocumentation, URIRef(reference_document))) ### ADMS:identifier alternative identifiers self.g.remove(( dataset_ref, ADMS.identifier, None, )) try: alt_ids = json.loads(dataset_dict['alternate_identifier']) except ( KeyError, TypeError, ValueError, ): alt_ids = [] for alt_identifier in alt_ids: node = BNode() self.g.add((dataset_ref, ADMS.identifier, node)) identifier = Literal(alt_identifier['identifier']) self.g.add((node, SKOS.notation, identifier)) if alt_identifier.get('agent'): adata = alt_identifier['agent'] agent = BNode() self.g.add((agent, RDF['type'], DCATAPIT.Agent)) self.g.add((agent, RDF['type'], FOAF.Agent)) self.g.add((node, DCT.creator, agent)) if adata.get('agent_name'): for alang, aname in adata['agent_name'].items(): self.g.add((agent, FOAF.name, Literal(aname, lang=alang))) if adata.get('agent_identifier'): self.g.add((agent, DCT.identifier, Literal(adata['agent_identifier']))) self._set_temporal_coverage(self.g, dataset_dict, dataset_ref) ### publisher # DCAT by default creates this node # <dct:publisher> # <foaf:Organization rdf:about="http://10.10.100.75/organization/55535226-f82a-4cf7-903a-3e10afeaa79a"> # <foaf:name>orga2_test</foaf:name> # </foaf:Organization> # </dct:publisher> for s, p, o in g.triples((dataset_ref, DCT.publisher, None)): #log.info("Removing publisher %r", o) g.remove((s, p, o)) publisher_ref = self._add_agent(dataset_dict, dataset_ref, 'publisher', DCT.publisher, use_default_lang=True) ### Autore : Agent self._add_creators(dataset_dict, dataset_ref) ### Point of Contact # <dcat:contactPoint rdf:resource="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"/> # <!-- http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri --> # <dcatapit:Organization rdf:about="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"> # <rdf:type rdf:resource="&vcard;Kind"/> # <rdf:type rdf:resource="&vcard;Organization"/> # <vcard:hasEmail rdf:resource="mailto:[email protected]"/> # <vcard:fn>Regione Liguria - Sportello Cartografico</vcard:fn> # </dcatapit:Organization> # TODO: preserve original info if harvested # retrieve the contactPoint added by the euro serializer euro_poc = g.value(subject=dataset_ref, predicate=DCAT.contactPoint, object=None, any=False) # euro poc has this format: # <dcat:contactPoint> # <vcard:Organization rdf:nodeID="Nfcd06f452bcd41f48f33c45b0c95979e"> # <vcard:fn>THE ORGANIZATION NAME</vcard:fn> # <vcard:hasEmail>THE ORGANIZATION EMAIL</vcard:hasEmail> # </vcard:Organization> # </dcat:contactPoint> if euro_poc: g.remove((dataset_ref, DCAT.contactPoint, euro_poc)) org_id = dataset_dict.get('owner_org') # get orga info org_show = logic.get_action('organization_show') org_dict = {} if org_id: try: org_dict = org_show({'ignore_auth': True}, { 'id': org_id, 'include_datasets': False, 'include_tags': False, 'include_users': False, 'include_groups': False, 'include_extras': True, 'include_followers': False }) except Exception, err: log.warning("Cannot get org for %s: %s", org_id, err, exc_info=err)