def get(self, request, ptype): type_uri = rdflib.URIRef('http://id.clarosnet.org/type/object/%s' % ptype) types = ObjectCategoryView.get_object_types(self.endpoint)[1] for t in types: if t._identifier == type_uri: type_resource = t break else: raise Http404 graph = self.endpoint.query(self._query % type_uri.n3()) subjects = graph.subjects(NS['crm'].P138i_has_representation) subjects = [Resource(s, graph, self.endpoint) for s in subjects] random.shuffle(subjects) subjects[200:] = [] context = { 'type': type_resource, 'types': types, 'graph': graph, 'subjects': subjects, 'queries': [graph.query], } return self.render(request, context, 'claros/objects')
def get_object_types(cls, endpoint): graph = endpoint.query(cls._query) subjects = graph.subjects( NS['crm'].P127_has_broader_term, rdflib.URIRef('http://id.clarosnet.org/type/object')) subjects = [Resource(s, graph, endpoint) for s in subjects] subjects.sort(key=lambda s: s.rdfs_label) for subject in subjects: subject.slug = subject._identifier.split('/')[-1] return graph, subjects
def items(self): items = [] for vacancy in self.graph.subjects(NS.rdf.type, NS.vacancy.Vacancy): descriptions = list(self.graph.objects(vacancy, NS.rdfs.comment)) # Favour XHTML descriptions.sort(key=lambda description: description.datatype!=NS.xtypes['Fragment-XHTML']) resource = Resource(vacancy, self.graph, self.endpoint) closing_date = self.graph.value(vacancy, NS.vacancy.applicationClosingDate) pubdate = dateutil.parser.parse(closing_date) if closing_date else None items.append({'title': self.graph.value(vacancy, NS.rdfs.label), 'description': descriptions[0] if descriptions else None, 'link': self.graph.value(vacancy, NS.foaf.homepage), 'date': pubdate, 'pubdate': pubdate}) if self.all and resource.get('oo:organizationalUnit'): subdepts = [r.label for r in resource.get_all('oo:organizationalUnit') if r.get('skos:prefLabel') and r._identifier != self.unit] if subdepts: items[-1]['title'] += " (%s)" % ', '.join(subdepts) items.sort(key=lambda item: (item['pubdate'], item['title'])) return items
def node(obj): if isinstance(obj, BaseResource): return obj.render() elif isinstance(obj, URIRef): return Resource(obj, ConjunctiveGraph(), None).render() elif isinstance(obj, Literal) and obj.datatype in (NS.xtypes['Fragment-HTML'], NS.rdf['HTML'], NS.xtypes['Fragment-XHTML']): return humfrey.utils.templatetags.humfrey_sanitizer.sanitize_html(obj) elif isinstance(obj, Literal): return mark_safe(escape(unicode(obj.toPython())).replace('\n', '<br/>\n')) else: return obj
def search_item_template(hit, default_search_item_template_name): types = set(t['uri'] for t in hit['source'].get('allTypes', ())) try: types.add(hit['source']['type']['uri']) except KeyError: pass graph = rdflib.ConjunctiveGraph() uri = rdflib.URIRef(hit['source']['uri']) for t in types: graph.add((uri, NS.rdf.type, rdflib.URIRef(t))) resource = Resource(uri, graph, None) template_name = getattr(resource, 'search_item_template_name', None) \ or default_search_item_template_name return template_name + ".html"
def render_kml(self, request, context, template_name): if not isinstance(context.get('graph'), rdflib.ConjunctiveGraph): return NotImplemented graph = context['graph'] subjects = set() for subject in set(graph.subjects()): subject = Resource(subject, graph, self.endpoint) if subject.geo_lat and subject.geo_long and isinstance( subject, rdflib.URIRef): subjects.add(subject) context['subjects'] = subjects return render_to_response( 'results/graph.kml', context, context_instance=RequestContext(request), mimetype='application/vnd.google-earth.kml+xml')
def get(self, request): additional_headers = {} # Apache helpfully(!?) unescapes encoded hash characters. If we get one # we know that the browser sent a '%23' (or else would have stripped it # as a fragment identifier. We replace it with a '%23' so that our URI # canonicalisation doesn't get stuck in an endless redirect loop. doc_url = request.build_absolute_uri().replace('#', '%23') # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo', # has a type (ergo 'bar' is a format), and if not we assume that # 'foo.bar' is part of the URI for formats in (None, ()): uri, format, is_local = doc_backward(doc_url, formats) if uri and not IRI.match(uri): raise Http404("Invalid IRI") if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404("Could not resolve URL to a URI") types = self.get_types(uri) if types: break doc_url = doc_url.rsplit('.', 1)[0] else: logger.debug("Resource has no type, so is probably not known in these parts: %r", uri) raise Http404("Resource has no type, so is probably not known in these parts") expected_doc_url = urlparse.urljoin(doc_url, doc_forward(uri, request, format=format, described=True)) if self.check_canonical and expected_doc_url != doc_url: logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True)) self.context.update({ 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, }) subject_uri, doc_uri = self.context['subject_uri'], self.context['doc_uri'] types = self.context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404("Graph was empty") self.template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)): self.template_name = tn break self.context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': map(self.endpoint.normalize_query, queries), 'template_name': self.template_name, }) self.set_renderers() for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(self.request, self.context) if additional_context: self.context.update(additional_context) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: if request.renderers: format = request.renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url # NOTE: This getattrs every atttr on subject, so would force # memoization on any cached attributes. We call it as late as # possible to make sure the graph won't change afterwards, making # those cached results incorrect. self.conneg += subject if self.context['format']: try: return self.render_to_format(format=format) except KeyError: raise Http404 else: return self.render()
def get(self, request): additional_headers = {} doc_url = request.build_absolute_uri() uri, format, is_local = doc_backward(doc_url, set(self._renderers_by_format)) if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404 expected_doc_url = doc_forward(uri, request, format=format, described=True) types = self.get_types(uri) if not types: logger.debug("Resource has no type, so is probably not known in these parts: %r", uri) raise Http404 if self.check_canonical and expected_doc_url != doc_url: logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: renderers = self.get_renderers(request) if renderers: format = renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True)) context = { 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, } subject_uri, doc_uri = context['subject_uri'], context['doc_uri'] types = context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404 for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(request=request, graph=graph, doc_uri=doc_uri, subject_uri=subject_uri, subject=subject, endpoint=self.endpoint, renderers=self._renderers) if additional_context: context.update(additional_context) context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': queries, 'template_name': subject.template_name, }) template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] print tn, types, subject.get_all('rdf:type') if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)): template_name = tn break if context['format']: try: return self.render_to_format(request, context, template_name, format) except KeyError: raise Http404 else: return self.render(request, context, template_name)
def get(self, request, root=None, base_type=None, graph=None, relations=None, template='graphviz/graphviz', depth=4, max_depth=5, excluded_types=None, properties=None, inverted=None, minimal=None): make_uriref = lambda uri: expand(uri) if uri else None root = make_uriref(root or request.GET.get('root')) base_type = make_uriref(base_type or request.GET.get('base_type')) graph = make_uriref(graph or request.GET.get('graph')) relations = relations or [ expand(relation) for relation in request.GET.getlist('relation') ] inverted = inverted if ( inverted is not None) else request.GET.get('inverted') == 'true' minimal = minimal if ( minimal is not None) else request.GET.get('minimal') == 'true' if not relations: raise Http404 if inverted: relation_pattern = '?entity ?relation ?parent' else: relation_pattern = '?parent ?relation ?entity' if root and base_type: raise HttpBadRequest elif root: if not self.get_types(root): raise Http404 if inverted: subj, obj = '?entity', root.n3() else: subj, obj = root.n3(), '?entity' try: depth = min(int(request.GET.get('depth', depth)), max_depth) except (TypeError, ValueError): return HttpResponseBadRequest() selector = self.tree_selector.format( subject=subj, object=obj, depth=depth, relationAlternation='|'.join(r.n3() for r in relations)) elif base_type: selector = self.type_selector.format( graph=graph.n3() if graph else '?graph', baseType=base_type.n3()) excluded_types = excluded_types or [ expand(t) for t in request.GET.getlist('exclude_type') ] properties = properties or [ expand(p) for p in request.GET.getlist('property') ] page_uri = rdflib.URIRef(request.build_absolute_uri()) query = self.query.format( selector=selector, relations=' '.join(r.n3() for r in relations), excludedTypes=' '.join(t.n3() for t in excluded_types), relationPattern=relation_pattern, page_uri=page_uri.n3(), propertyPatterns='\n '.join( 'OPTIONAL { ?entity %s ?p%s } .' % (p.n3(), i) for i, p in enumerate(properties)), propertyTriples=''.join(';\n %s ?p%s' % (p.n3(), i) for i, p in enumerate(properties))) graph = self.endpoint.query(query) subjects = [ Resource(s, graph, self.endpoint) for s in set(graph.objects(page_uri, NS['foaf'].topic)) ] subjects.sort(key=lambda s: s.label) subject = Resource(root, graph, self.endpoint) if root else None context = { 'graph': graph, 'queries': [graph.query], 'subjects': subjects, 'subject': subject, 'inverted': inverted, 'relations': relations, 'minimal': minimal, 'filename_base': slugify(subject.label if subject else 'graphviz')[:32] } for subject in subjects: if not inverted: subject.children = set( Resource(s, graph, self.endpoint) for relation in relations for s in graph.objects(subject._identifier, relation)) else: subject.children = set( Resource(s, graph, self.endpoint) for relation in relations for s in graph.subjects(relation, subject._identifier)) for child in subject.children: if (page_uri, NS['foaf'].topic, child._identifier) in graph: child.display = True return self.render(request, context, template)
def upload_dataset_metadata(sender, store, graphs, when, **kwargs): if store.slug != DEFAULT_STORE_SLUG: return if not graphs: logger.debug("No graphs updated for %r; aborting", store.slug) return if not getattr(settings, 'CKAN_API_KEY', None): logger.debug("No CKAN_API_KEY setting, not doing anything.") return client = ckanclient.CkanClient(api_key=settings.CKAN_API_KEY) endpoint = Endpoint(settings.ENDPOINT_QUERY) query = _dataset_query % ' \n'.join('(%s)' % rdflib.URIRef(g).n3() for g in graphs) graph = endpoint.query(query) datasets = list(graph.subjects(NS.rdf.type, NS.void.Dataset)) if len(datasets) != 1: logger.debug("Expected one dataset; got %d", len(datasets)) return dataset = Resource(datasets[0], graph, endpoint) find = functools.partial(_find, graph, dataset._identifier) patterns = settings.CKAN_PATTERNS package_name = find('skos:notation', HUMFREY.theDataHubDatasetName) if not package_name: return #package_name = patterns.get('name', '%s') % slug package_title = patterns.get('title', '%s') % dataset.label author = find('dcterms:publisher/foaf:name|rdfs:label|dc:title|skos:prefLabel|dcterms:title') if author: author = patterns.get('author', '%s') % author description = find('rdfs:comment|dcterms:description', (NS.xtypes['Fragment-Markdown'], NS.xtypes['Fragment-PlainText'], None)) maintainer = find('oo:contact/foaf:name|rdfs:label|dc:title|skos:prefLabel|dcterms:title') if maintainer: maintainer = patterns.get('maintainer', '%s') % maintainer maintainer_email = find('oo:contact/foaf:mbox|v:email') if maintainer_email: maintainer_email = maintainer_email.replace('mailto:', '') license = find('dcterms:license|cc:license') if license: license = _licenses.get(unicode(license)) sparql_endpoint = find('void:sparqlEndpoint') if sparql_endpoint: sparql_endpoint = unicode(sparql_endpoint) else: sparql_endpoint = 'http:' + reverse_full('data', 'sparql:endpoint') tags = find('humfrey:theDataHubDatasetTag', all=True) groups = find('humfrey:theDataHubDatasetGroup', all=True) url = doc_forward(dataset.uri) logger.debug("Fetching existing record for %r", package_name) try: package_entity = client.package_entity_get(package_name) logger.debug("Record successfully retrieved") except ckanclient.CkanApiNotFoundError: package_entity = {'name': package_name} client.package_register_post(package_entity) logger.debug("No record found; starting from empty") original = copy.deepcopy(package_entity) package_entity.update({'name': package_name, 'title': package_title, 'url': url, 'notes': description, 'license_id': license, 'author': author, 'maintainer': maintainer, 'maintainer_email': dataset.oo_contact.get_one_of('foaf:mbox', 'v:email').replace('mailto:', '', 1)}) package_entity['groups'] = list(settings.CKAN_GROUPS | set(package_entity.get('groups', ())) | groups) package_entity['tags'] = list(settings.CKAN_TAGS | set(package_entity.get('tags', ())) | tags) resources = collections.defaultdict(dict, ((r.get('name'), r) for r in package_entity.get('resources', ()))) resources['SPARQL endpoint'].update({'name': 'SPARQL endpoint', 'format': 'api/sparql', 'url': sparql_endpoint}) package_entity['resources'] = resources.values() logger.debug("Updated CKAN record") if original != package_entity: logger.info("Updating %r at thedatahub.org", package_name) client.package_entity_put(package_entity)
def get(self, uri): return Resource(self, uri)
def get(self, request): additional_headers = {} # Apache helpfully(!?) unescapes encoded hash characters. If we get one # we know that the browser sent a '%23' (or else would have stripped it # as a fragment identifier. We replace it with a '%23' so that our URI # canonicalisation doesn't get stuck in an endless redirect loop. doc_url = request.build_absolute_uri().replace('#', '%23') # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo', # has a type (ergo 'bar' is a format), and if not we assume that # 'foo.bar' is part of the URI for formats in (None, ()): uri, format, is_local = doc_backward(doc_url, formats) if uri and not IRI.match(uri): raise Http404("Invalid IRI") if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404("Could not resolve URL to a URI") types = self.get_types(uri) if types: break doc_url = doc_url.rsplit('.', 1)[0] else: logger.debug( "Resource has no type, so is probably not known in these parts: %r", uri) raise Http404( "Resource has no type, so is probably not known in these parts" ) expected_doc_url = urlparse.urljoin( doc_url, doc_forward(uri, request, format=format, described=True)) if self.check_canonical and expected_doc_url != doc_url: logger.debug( "Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) doc_uri = rdflib.URIRef( doc_forward(uri, request, format=None, described=True)) self.context.update({ 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, }) subject_uri, doc_uri = self.context['subject_uri'], self.context[ 'doc_uri'] types = self.context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404("Graph was empty") self.template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] if set(subject._graph.objects( subject._identifier, NS.rdf.type)) & set(map( expand, types)): self.template_name = tn break self.context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': map(self.endpoint.normalize_query, queries), 'template_name': self.template_name, }) self.set_renderers() for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(self.request, self.context) if additional_context: self.context.update(additional_context) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: if request.renderers: format = request.renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url # NOTE: This getattrs every atttr on subject, so would force # memoization on any cached attributes. We call it as late as # possible to make sure the graph won't change afterwards, making # those cached results incorrect. self.conneg += subject if self.context['format']: try: return self.render_to_format(format=format) except KeyError: raise Http404 else: return self.render()
def get(self, request, root=None, base_type=None, graph=None, relations=None, template='graphviz/graphviz', depth=4, max_depth=5, excluded_types=None, properties=None, inverted=None, minimal=None): make_uriref = lambda uri: expand(uri) if uri else None root = make_uriref(root or request.GET.get('root')) base_type = make_uriref(base_type or request.GET.get('base_type')) graph = make_uriref(graph or request.GET.get('graph')) relations = relations or [expand(relation) for relation in request.GET.getlist('relation')] inverted = inverted if (inverted is not None) else request.GET.get('inverted') == 'true' minimal = minimal if (minimal is not None) else request.GET.get('minimal') == 'true' if not relations: raise Http404 if inverted: relation_pattern = '?entity ?relation ?parent' else: relation_pattern = '?parent ?relation ?entity' if root and base_type: raise HttpBadRequest elif root: if not self.get_types(root): raise Http404 if inverted: subj, obj = '?entity', root.n3() else: subj, obj = root.n3(), '?entity' try: depth = min(int(request.GET.get('depth', depth)), max_depth) except (TypeError, ValueError): return HttpResponseBadRequest() selector = self.tree_selector.format(subject=subj, object=obj, depth=depth, relationAlternation='|'.join(r.n3() for r in relations)) elif base_type: selector = self.type_selector.format(graph=graph.n3() if graph else '?graph', baseType=base_type.n3()) excluded_types = excluded_types or [expand(t) for t in request.GET.getlist('exclude_type')] properties = properties or [expand(p) for p in request.GET.getlist('property')] page_uri = rdflib.URIRef(request.build_absolute_uri()) query = self.query.format(selector=selector, relations=' '.join(r.n3() for r in relations), excludedTypes=' '.join(t.n3() for t in excluded_types), relationPattern=relation_pattern, page_uri=page_uri.n3(), propertyPatterns='\n '.join('OPTIONAL { ?entity %s ?p%s } .' % (p.n3(), i) for i, p in enumerate(properties)), propertyTriples=''.join(';\n %s ?p%s' % (p.n3(), i) for i, p in enumerate(properties)) ) graph = self.endpoint.query(query) subjects = [Resource(s, graph, self.endpoint) for s in set(graph.objects(page_uri, NS['foaf'].topic))] subjects.sort(key=lambda s: s.label) subject = Resource(root, graph, self.endpoint) if root else None context = { 'graph': graph, 'queries': [graph.query], 'subjects': subjects, 'subject': subject, 'inverted': inverted, 'relations': relations, 'minimal': minimal, 'filename_base': slugify(subject.label if subject else 'graphviz')[:32] } for subject in subjects: if not inverted: subject.children = set(Resource(s, graph, self.endpoint) for relation in relations for s in graph.objects(subject._identifier, relation)) else: subject.children = set(Resource(s, graph, self.endpoint) for relation in relations for s in graph.subjects(relation, subject._identifier)) for child in subject.children: if (page_uri, NS['foaf'].topic, child._identifier) in graph: child.display = True return self.render(request, context, template)