def _iter(self, sparql_results_type, fields, bindings, boolean, triples): queue = Queue.Queue() graph = Graph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) triples = list(triples) with statsd.timer('humfrey.streaming.rdflib-serializer.add-triples.' + self.plugin_name): graph += triples serializer_thread = threading.Thread(target=self._serialize_to_queue, args=(graph, queue)) with statsd.timer('humfrey.streaming.rdflib-serializer.serialize.' + self.plugin_name): serializer_thread.start() while True: type, value = queue.get() if type == 'data': yield value elif type == 'sentinel': break elif type == 'exception': raise value[0], value[1], value[2] serializer_thread.join()
def label2(self): for prefix, uri in NS.iteritems(): if self._identifier.startswith(uri): localpart = self._identifier[len(uri):] if LOCALPART_RE.match(localpart): return '%s:%s' % (prefix, localpart) return self._identifier
def get(self): """ Returns an in-memory object representing the stream. You will either get a SparqlResultsList, a bool, or a ConjunctiveGraph. """ if self._cached_get is None: sparql_results_type = self.get_sparql_results_type() if sparql_results_type == 'resultset': self._cached_get = SparqlResultList(self.get_fields(), self.get_bindings()) elif sparql_results_type == 'boolean': self._cached_get = self.get_boolean() elif sparql_results_type == 'graph': graph = rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += self.get_triples() self._cached_get = graph else: raise AssertionError( "Unexpected results type: {0}".format(sparql_results_type)) for name in ('query', 'duration'): if hasattr(self, name): setattr(self._cached_get, name, getattr(self, name)) return self._cached_get
def execute(self, transform_manager): for prefix, uri in NS.iteritems(): try: self.load_vocabulary(transform_manager, prefix, uri) except Exception, e: logger.exception("Failed to load vocabulary: %r from %r", prefix, uri)
def get(self): """ Returns an in-memory object representing the stream. You will either get a SparqlResultsList, a bool, or a ConjunctiveGraph. """ if self._cached_get is None: sparql_results_type = self.get_sparql_results_type() if sparql_results_type == 'resultset': self._cached_get = SparqlResultList(self.get_fields(), self.get_bindings()) elif sparql_results_type == 'boolean': self._cached_get = self.get_boolean() elif sparql_results_type == 'graph': graph = rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += self.get_triples() self._cached_get = graph else: raise AssertionError("Unexpected results type: {0}".format(sparql_results_type)) for name in ('query', 'duration'): if hasattr(self, name): setattr(self._cached_get, name, getattr(self, name)) return self._cached_get
def get(self, request): additional_headers = {} # Apache helpfully(!?) unescapes encoded hash characters. If we get one # we know that the browser sent a '%23' (or else would have stripped it # as a fragment identifier. We replace it with a '%23' so that our URI # canonicalisation doesn't get stuck in an endless redirect loop. doc_url = request.build_absolute_uri().replace('#', '%23') # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo', # has a type (ergo 'bar' is a format), and if not we assume that # 'foo.bar' is part of the URI for formats in (None, ()): uri, format, is_local = doc_backward(doc_url, formats) if uri and not IRI.match(uri): raise Http404("Invalid IRI") if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404("Could not resolve URL to a URI") types = self.get_types(uri) if types: break doc_url = doc_url.rsplit('.', 1)[0] else: logger.debug("Resource has no type, so is probably not known in these parts: %r", uri) raise Http404("Resource has no type, so is probably not known in these parts") expected_doc_url = urlparse.urljoin(doc_url, doc_forward(uri, request, format=format, described=True)) if self.check_canonical and expected_doc_url != doc_url: logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True)) self.context.update({ 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, }) subject_uri, doc_uri = self.context['subject_uri'], self.context['doc_uri'] types = self.context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404("Graph was empty") self.template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)): self.template_name = tn break self.context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': map(self.endpoint.normalize_query, queries), 'template_name': self.template_name, }) self.set_renderers() for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(self.request, self.context) if additional_context: self.context.update(additional_context) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: if request.renderers: format = request.renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url # NOTE: This getattrs every atttr on subject, so would force # memoization on any cached attributes. We call it as late as # possible to make sure the graph won't change afterwards, making # those cached results incorrect. self.conneg += subject if self.context['format']: try: return self.render_to_format(format=format) except KeyError: raise Http404 else: return self.render()
def get(self, request): additional_headers = {} doc_url = request.build_absolute_uri() uri, format, is_local = doc_backward(doc_url, set(self._renderers_by_format)) if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404 expected_doc_url = doc_forward(uri, request, format=format, described=True) types = self.get_types(uri) if not types: logger.debug("Resource has no type, so is probably not known in these parts: %r", uri) raise Http404 if self.check_canonical and expected_doc_url != doc_url: logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: renderers = self.get_renderers(request) if renderers: format = renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True)) context = { 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, } subject_uri, doc_uri = context['subject_uri'], context['doc_uri'] types = context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404 for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(request=request, graph=graph, doc_uri=doc_uri, subject_uri=subject_uri, subject=subject, endpoint=self.endpoint, renderers=self._renderers) if additional_context: context.update(additional_context) context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': queries, 'template_name': subject.template_name, }) template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] print tn, types, subject.get_all('rdf:type') if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)): template_name = tn break if context['format']: try: return self.render_to_format(request, context, template_name, format) except KeyError: raise Http404 else: return self.render(request, context, template_name)
def get(self, request): additional_headers = {} # Apache helpfully(!?) unescapes encoded hash characters. If we get one # we know that the browser sent a '%23' (or else would have stripped it # as a fragment identifier. We replace it with a '%23' so that our URI # canonicalisation doesn't get stuck in an endless redirect loop. doc_url = request.build_absolute_uri().replace('#', '%23') # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo', # has a type (ergo 'bar' is a format), and if not we assume that # 'foo.bar' is part of the URI for formats in (None, ()): uri, format, is_local = doc_backward(doc_url, formats) if uri and not IRI.match(uri): raise Http404("Invalid IRI") if not uri: logger.debug("Could not resolve URL to a URI: %r", doc_url) raise Http404("Could not resolve URL to a URI") types = self.get_types(uri) if types: break doc_url = doc_url.rsplit('.', 1)[0] else: logger.debug( "Resource has no type, so is probably not known in these parts: %r", uri) raise Http404( "Resource has no type, so is probably not known in these parts" ) expected_doc_url = urlparse.urljoin( doc_url, doc_forward(uri, request, format=format, described=True)) if self.check_canonical and expected_doc_url != doc_url: logger.debug( "Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url) return HttpResponsePermanentRedirect(expected_doc_url) doc_uri = rdflib.URIRef( doc_forward(uri, request, format=None, described=True)) self.context.update({ 'subject_uri': uri, 'doc_uri': doc_uri, 'format': format, 'types': types, 'show_follow_link': not is_local, 'no_index': not is_local, 'additional_headers': additional_headers, }) subject_uri, doc_uri = self.context['subject_uri'], self.context[ 'doc_uri'] types = self.context['types'] queries, graph = [], rdflib.ConjunctiveGraph() for prefix, namespace_uri in NS.iteritems(): graph.namespace_manager.bind(prefix, namespace_uri) graph += ((subject_uri, NS.rdf.type, t) for t in types) subject = Resource(subject_uri, graph, self.endpoint) for query in subject.get_queries(): graph += self.endpoint.query(query) queries.append(query) licenses, datasets = set(), set() for graph_name in graph.subjects(NS['ov'].describes): graph.add((doc_uri, NS['dcterms'].source, graph_name)) licenses.update(graph.objects(graph_name, NS['dcterms'].license)) datasets.update(graph.objects(graph_name, NS['void'].inDataset)) if len(licenses) == 1: for license_uri in licenses: graph.add((doc_uri, NS['dcterms'].license, license_uri)) if not graph: logger.debug("Graph for %r was empty; 404ing", uri) raise Http404("Graph was empty") self.template_name = subject.template_name or self.template_name for template_override in self.template_overrides: tn, types = template_override[0], template_override[1:] if set(subject._graph.objects( subject._identifier, NS.rdf.type)) & set(map( expand, types)): self.template_name = tn break self.context.update({ 'graph': graph, 'subject': subject, 'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses], 'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets], 'queries': map(self.endpoint.normalize_query, queries), 'template_name': self.template_name, }) self.set_renderers() for doc_rdf_processor in self._doc_rdf_processors: additional_context = doc_rdf_processor(self.request, self.context) if additional_context: self.context.update(additional_context) # If no format was given explicitly (i.e. format parameter or # extension) we inspect the Content-Type header. if not format: if request.renderers: format = request.renderers[0].format expected_doc_url = doc_forward(uri, request, format=format, described=True) if expected_doc_url != doc_url: additional_headers['Content-Location'] = expected_doc_url # NOTE: This getattrs every atttr on subject, so would force # memoization on any cached attributes. We call it as late as # possible to make sure the graph won't change afterwards, making # those cached results incorrect. self.conneg += subject if self.context['format']: try: return self.render_to_format(format=format) except KeyError: raise Http404 else: return self.render()