def test_all_resource_fields(self): license = LicenseFactory() resource = ResourceFactory(format='csv') dataset = DatasetFactory(resources=[resource], license=license) permalink = url_for('datasets.resource', id=resource.id, _external=True) r = resource_to_rdf(resource, dataset) assert r.value(DCT.title) == Literal(resource.title) assert r.value(DCT.description) == Literal(resource.description) assert r.value(DCT.issued) == Literal(resource.published) assert r.value(DCT.modified) == Literal(resource.modified) assert r.value(DCT.license).identifier == URIRef(license.url) assert r.value(DCT.rights) == Literal(license.title) assert r.value(DCAT.downloadURL).identifier == URIRef(resource.url) assert r.value(DCAT.accessURL).identifier == URIRef(permalink) assert r.value(DCAT.bytesSize) == Literal(resource.filesize) assert r.value(DCAT.mediaType) == Literal(resource.mime) assert r.value(DCT.term('format')) == Literal(resource.format) checksum = r.value(SPDX.checksum) assert r.graph.value(checksum.identifier, RDF.type) == SPDX.Checksum assert (r.graph.value(checksum.identifier, SPDX.algorithm) == SPDX.checksumAlgorithm_sha1) assert (checksum.value(SPDX.checksumValue) == Literal(resource.checksum.value))
def test_resource_title_from_format(self): node = BNode() g = Graph() url = 'https://www.somewhere.com/no-extension/' g.set((node, RDF.type, DCAT.Distribution)) g.set((node, DCAT.downloadURL, URIRef(url))) g.set((node, DCT.term('format'), Literal('CSV'))) resource = resource_from_rdf(g) resource.validate() assert resource.title == 'csv resource'
def resource_from_rdf(graph_or_distrib, dataset=None): ''' Map a Resource domain model to a DCAT/RDF graph ''' if isinstance(graph_or_distrib, RdfResource): distrib = graph_or_distrib else: node = graph_or_distrib.value(predicate=RDF.type, object=DCAT.Distribution) distrib = graph_or_distrib.resource(node) download_url = url_from_rdf(distrib, DCAT.downloadURL) access_url = url_from_rdf(distrib, DCAT.accessURL) url = safe_unicode(download_url or access_url) if dataset: resource = get_by(dataset.resources, 'url', url) if not dataset or not resource: resource = Resource() if dataset: dataset.resources.append(resource) resource.title = title_from_rdf(distrib, url) resource.url = url resource.description = sanitize_html(distrib.value(DCT.description)) resource.filesize = rdf_value(distrib, DCAT.bytesSize) resource.mime = rdf_value(distrib, DCAT.mediaType) fmt = rdf_value(distrib, DCT.term('format')) if fmt: resource.format = fmt.lower() checksum = distrib.value(SPDX.checksum) if checksum: algorithm = checksum.value(SPDX.algorithm).identifier algorithm = CHECKSUM_ALGORITHMS.get(algorithm) if algorithm: resource.checksum = Checksum() resource.checksum.value = rdf_value(checksum, SPDX.checksumValue) resource.checksum.type = algorithm resource.published = rdf_value(distrib, DCT.issued, resource.published) resource.modified = rdf_value(distrib, DCT.modified, resource.modified) identifier = rdf_value(distrib, DCT.identifier) if identifier: resource.extras['dct:identifier'] = identifier if isinstance(distrib.identifier, URIRef): resource.extras['uri'] = distrib.identifier.toPython() return resource
def test_all_resource_fields(self): node = BNode() g = Graph() title = faker.sentence() url = faker.uri() description = faker.paragraph() filesize = faker.pyint() issued = faker.date_time_between(start_date='-60d', end_date='-30d') modified = faker.past_datetime(start_date='-30d') mime = faker.mime_type() sha1 = faker.sha1() g.add((node, RDF.type, DCAT.Distribution)) g.add((node, DCT.title, Literal(title))) g.add((node, DCT.description, Literal(description))) g.add((node, DCAT.downloadURL, Literal(url))) g.add((node, DCT.issued, Literal(issued))) g.add((node, DCT.modified, Literal(modified))) g.add((node, DCAT.bytesSize, Literal(filesize))) g.add((node, DCAT.mediaType, Literal(mime))) g.add((node, DCT.term('format'), Literal('CSV'))) checksum = BNode() g.add((node, SPDX.checksum, checksum)) g.add((checksum, RDF.type, SPDX.Checksum)) g.add((checksum, SPDX.algorithm, SPDX.checksumAlgorithm_sha1)) g.add((checksum, SPDX.checksumValue, Literal(sha1))) resource = resource_from_rdf(g) resource.validate() assert isinstance(resource, Resource) assert resource.title == title assert resource.url == url assert resource.description == description assert resource.filesize == filesize assert resource.mime == mime assert isinstance(resource.checksum, Checksum) assert resource.checksum.type == 'sha1' assert resource.checksum.value == sha1 assert resource.published == issued assert resource.modified == modified assert resource.format == 'csv'
def resource_to_rdf(resource, dataset=None, graph=None): ''' Map a Resource domain model to a DCAT/RDF graph ''' graph = graph or Graph(namespace_manager=namespace_manager) if dataset and dataset.id: id = URIRef( url_for('datasets.show_redirect', dataset=dataset.id, _external=True, _anchor='resource-{0}'.format(resource.id))) else: id = BNode(resource.id) permalink = url_for('datasets.resource', id=resource.id, _external=True) r = graph.resource(id) r.set(RDF.type, DCAT.Distribution) r.set(DCT.identifier, Literal(resource.id)) r.add(DCT.title, Literal(resource.title)) r.add(DCT.description, Literal(resource.description)) r.add(DCAT.downloadURL, URIRef(resource.url)) r.add(DCAT.accessURL, URIRef(permalink)) r.add(DCT.issued, Literal(resource.published)) r.add(DCT.modified, Literal(resource.modified)) if dataset and dataset.license: r.add(DCT.rights, Literal(dataset.license.title)) if dataset.license.url: r.add(DCT.license, URIRef(dataset.license.url)) if resource.filesize is not None: r.add(DCAT.bytesSize, Literal(resource.filesize)) if resource.mime: r.add(DCAT.mediaType, Literal(resource.mime)) if resource.format: r.add(DCT.term('format'), Literal(resource.format)) if resource.checksum: checksum = graph.resource(BNode()) checksum.set(RDF.type, SPDX.Checksum) algorithm = 'checksumAlgorithm_{0}'.format(resource.checksum.type) checksum.add(SPDX.algorithm, getattr(SPDX, algorithm)) checksum.add(SPDX.checksumValue, Literal(resource.checksum.value)) r.add(SPDX.checksum, checksum) return r
def title_from_rdf(rdf, url): ''' Try to extract a distribution title from a property. As it's not a mandatory property, it fallback on building a title from the URL then the format and in last ressort a generic resource name. ''' title = rdf_value(rdf, DCT.title) if title: return title if url: last_part = url.split('/')[-1] if '.' in last_part and '?' not in last_part: return last_part fmt = rdf_value(rdf, DCT.term('format')) lang = current_app.config['DEFAULT_LANGUAGE'] with i18n.language(lang): if fmt: return i18n._('{format} resource').format(format=fmt.lower()) else: return i18n._('Nameless resource')
def resource_to_rdf(resource, dataset=None, graph=None): ''' Map a Resource domain model to a DCAT/RDF graph ''' graph = graph or Graph(namespace_manager=namespace_manager) if dataset and dataset.id: id = URIRef(url_for('datasets.show_redirect', dataset=dataset.id, _external=True, _anchor='resource-{0}'.format(resource.id))) else: id = BNode(resource.id) permalink = url_for('datasets.resource', id=resource.id, _external=True) r = graph.resource(id) r.set(RDF.type, DCAT.Distribution) r.set(DCT.identifier, Literal(resource.id)) r.add(DCT.title, Literal(resource.title)) r.add(DCT.description, Literal(resource.description)) r.add(DCAT.downloadURL, URIRef(resource.url)) r.add(DCAT.accessURL, URIRef(permalink)) r.add(DCT.issued, Literal(resource.published)) r.add(DCT.modified, Literal(resource.modified)) if dataset and dataset.license: r.add(DCT.rights, Literal(dataset.license.title)) if dataset.license.url: r.add(DCT.license, URIRef(dataset.license.url)) if resource.filesize is not None: r.add(DCAT.bytesSize, Literal(resource.filesize)) if resource.mime: r.add(DCAT.mediaType, Literal(resource.mime)) if resource.format: r.add(DCT.term('format'), Literal(resource.format)) if resource.checksum: checksum = graph.resource(BNode()) checksum.set(RDF.type, SPDX.Checksum) algorithm = 'checksumAlgorithm_{0}'.format(resource.checksum.type) checksum.add(SPDX.algorithm, getattr(SPDX, algorithm)) checksum.add(SPDX.checksumValue, Literal(resource.checksum.value)) r.add(SPDX.checksum, checksum) return r