Exemplo n.º 1
0
    def _iter(self, sparql_results_type, fields, bindings, boolean, triples):
        queue = Queue.Queue()
        graph = Graph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        triples = list(triples)
        with statsd.timer('humfrey.streaming.rdflib-serializer.add-triples.' +
                          self.plugin_name):
            graph += triples
        serializer_thread = threading.Thread(target=self._serialize_to_queue,
                                             args=(graph, queue))

        with statsd.timer('humfrey.streaming.rdflib-serializer.serialize.' +
                          self.plugin_name):
            serializer_thread.start()
            while True:
                type, value = queue.get()
                if type == 'data':
                    yield value
                elif type == 'sentinel':
                    break
                elif type == 'exception':
                    raise value[0], value[1], value[2]
            serializer_thread.join()
Exemplo n.º 2
0
 def label2(self):
     for prefix, uri in NS.iteritems():
         if self._identifier.startswith(uri):
             localpart = self._identifier[len(uri):]
             if LOCALPART_RE.match(localpart):
                 return '%s:%s' % (prefix, localpart)
     return self._identifier
Exemplo n.º 3
0
    def get(self):
        """
        Returns an in-memory object representing the stream.

        You will either get a SparqlResultsList, a bool, or a ConjunctiveGraph.
        """
        if self._cached_get is None:
            sparql_results_type = self.get_sparql_results_type()
            if sparql_results_type == 'resultset':
                self._cached_get = SparqlResultList(self.get_fields(),
                                                    self.get_bindings())
            elif sparql_results_type == 'boolean':
                self._cached_get = self.get_boolean()
            elif sparql_results_type == 'graph':
                graph = rdflib.ConjunctiveGraph()
                for prefix, namespace_uri in NS.iteritems():
                    graph.namespace_manager.bind(prefix, namespace_uri)
                graph += self.get_triples()
                self._cached_get = graph
            else:
                raise AssertionError(
                    "Unexpected results type: {0}".format(sparql_results_type))
            for name in ('query', 'duration'):
                if hasattr(self, name):
                    setattr(self._cached_get, name, getattr(self, name))
        return self._cached_get
Exemplo n.º 4
0
 def label2(self):
     for prefix, uri in NS.iteritems():
         if self._identifier.startswith(uri):
             localpart = self._identifier[len(uri):]
             if LOCALPART_RE.match(localpart):
                 return '%s:%s' % (prefix, localpart)
     return self._identifier
Exemplo n.º 5
0
    def execute(self, transform_manager):

        for prefix, uri in NS.iteritems():
            try:
                self.load_vocabulary(transform_manager, prefix, uri)
            except Exception, e:
                logger.exception("Failed to load vocabulary: %r from %r", prefix, uri)
Exemplo n.º 6
0
    def execute(self, transform_manager):

        for prefix, uri in NS.iteritems():
            try:
                self.load_vocabulary(transform_manager, prefix, uri)
            except Exception, e:
                logger.exception("Failed to load vocabulary: %r from %r",
                                 prefix, uri)
Exemplo n.º 7
0
    def _iter(self, sparql_results_type, fields, bindings, boolean, triples):
        queue = Queue.Queue()
        graph = Graph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        triples = list(triples)
        with statsd.timer('humfrey.streaming.rdflib-serializer.add-triples.' + self.plugin_name):
            graph += triples
        serializer_thread = threading.Thread(target=self._serialize_to_queue,
                                             args=(graph, queue))

        with statsd.timer('humfrey.streaming.rdflib-serializer.serialize.' + self.plugin_name):
            serializer_thread.start()
            while True:
                type, value = queue.get()
                if type == 'data':
                    yield value
                elif type == 'sentinel':
                    break
                elif type == 'exception':
                    raise value[0], value[1], value[2]
            serializer_thread.join()
Exemplo n.º 8
0
    def get(self):
        """
        Returns an in-memory object representing the stream.

        You will either get a SparqlResultsList, a bool, or a ConjunctiveGraph.
        """
        if self._cached_get is None:
            sparql_results_type = self.get_sparql_results_type()
            if sparql_results_type == 'resultset':
                self._cached_get = SparqlResultList(self.get_fields(), self.get_bindings())
            elif sparql_results_type == 'boolean':
                self._cached_get = self.get_boolean()
            elif sparql_results_type == 'graph':
                graph = rdflib.ConjunctiveGraph()
                for prefix, namespace_uri in NS.iteritems():
                    graph.namespace_manager.bind(prefix, namespace_uri)
                graph += self.get_triples()
                self._cached_get = graph
            else:
                raise AssertionError("Unexpected results type: {0}".format(sparql_results_type))
            for name in ('query', 'duration'):
                if hasattr(self, name):
                    setattr(self._cached_get, name, getattr(self, name))
        return self._cached_get
Exemplo n.º 9
0
    def get(self, request):
        additional_headers = {}

        # Apache helpfully(!?) unescapes encoded hash characters. If we get one
        # we know that the browser sent a '%23' (or else would have stripped it
        # as a fragment identifier. We replace it with a '%23' so that our URI
        # canonicalisation doesn't get stuck in an endless redirect loop.
        doc_url = request.build_absolute_uri().replace('#', '%23')

        # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo',
        # has a type (ergo 'bar' is a format), and if not we assume that
        # 'foo.bar' is part of the URI
        for formats in (None, ()):
            uri, format, is_local = doc_backward(doc_url, formats)
            if uri and not IRI.match(uri):
                raise Http404("Invalid IRI")
            if not uri:
                logger.debug("Could not resolve URL to a URI: %r", doc_url)
                raise Http404("Could not resolve URL to a URI")
            types = self.get_types(uri)
            if types:
                break
            doc_url = doc_url.rsplit('.', 1)[0]
        else:
            logger.debug("Resource has no type, so is probably not known in these parts: %r", uri)
            raise Http404("Resource has no type, so is probably not known in these parts")

        expected_doc_url = urlparse.urljoin(doc_url, doc_forward(uri, request, format=format, described=True))
        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True))

        self.context.update({
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        })

        subject_uri, doc_uri = self.context['subject_uri'], self.context['doc_uri']
        types = self.context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404("Graph was empty")

        self.template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)):
                self.template_name = tn
                break

        self.context.update({
            'graph': graph,
            'subject': subject,
            'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries': map(self.endpoint.normalize_query, queries),
            'template_name': self.template_name,
        })

        self.set_renderers()

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(self.request, self.context)
            if additional_context:
                self.context.update(additional_context)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            if request.renderers:
                format = request.renderers[0].format
                expected_doc_url = doc_forward(uri, request, format=format, described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        # NOTE: This getattrs every atttr on subject, so would force
        # memoization on any cached attributes. We call it as late as
        # possible to make sure the graph won't change afterwards, making
        # those cached results incorrect.
        self.conneg += subject

        if self.context['format']:
            try:
                return self.render_to_format(format=format)
            except KeyError:
                raise Http404
        else:
            return self.render()
Exemplo n.º 10
0
    def get(self, request):
        additional_headers = {}
        doc_url = request.build_absolute_uri()

        uri, format, is_local = doc_backward(doc_url, set(self._renderers_by_format))
        if not uri:
            logger.debug("Could not resolve URL to a URI: %r", doc_url)
            raise Http404

        expected_doc_url = doc_forward(uri, request, format=format, described=True)

        types = self.get_types(uri)
        if not types:
            logger.debug("Resource has no type, so is probably not known in these parts: %r", uri)
            raise Http404

        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            renderers = self.get_renderers(request)
            if renderers:
                format = renderers[0].format
                expected_doc_url = doc_forward(uri, request, format=format, described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True))

        context = {
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        }

        subject_uri, doc_uri = context['subject_uri'], context['doc_uri']
        types = context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(request=request,
                                                   graph=graph,
                                                   doc_uri=doc_uri,
                                                   subject_uri=subject_uri,
                                                   subject=subject,
                                                   endpoint=self.endpoint,
                                                   renderers=self._renderers)
            if additional_context:
                context.update(additional_context)

        context.update({
            'graph': graph,
            'subject': subject,
            'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries': queries,
            'template_name': subject.template_name,
        })

        template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            print tn, types, subject.get_all('rdf:type')
            if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)):
                template_name = tn
                break

        if context['format']:
            try:
                return self.render_to_format(request, context, template_name, format)
            except KeyError:
                raise Http404
        else:
            return self.render(request, context, template_name)
Exemplo n.º 11
0
    def get(self, request):
        additional_headers = {}

        # Apache helpfully(!?) unescapes encoded hash characters. If we get one
        # we know that the browser sent a '%23' (or else would have stripped it
        # as a fragment identifier. We replace it with a '%23' so that our URI
        # canonicalisation doesn't get stuck in an endless redirect loop.
        doc_url = request.build_absolute_uri().replace('#', '%23')

        # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo',
        # has a type (ergo 'bar' is a format), and if not we assume that
        # 'foo.bar' is part of the URI
        for formats in (None, ()):
            uri, format, is_local = doc_backward(doc_url, formats)
            if uri and not IRI.match(uri):
                raise Http404("Invalid IRI")
            if not uri:
                logger.debug("Could not resolve URL to a URI: %r", doc_url)
                raise Http404("Could not resolve URL to a URI")
            types = self.get_types(uri)
            if types:
                break
            doc_url = doc_url.rsplit('.', 1)[0]
        else:
            logger.debug(
                "Resource has no type, so is probably not known in these parts: %r",
                uri)
            raise Http404(
                "Resource has no type, so is probably not known in these parts"
            )

        expected_doc_url = urlparse.urljoin(
            doc_url, doc_forward(uri, request, format=format, described=True))
        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug(
                "Request for a non-canonical doc URL (%r) for %r, redirecting to %r",
                doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        doc_uri = rdflib.URIRef(
            doc_forward(uri, request, format=None, described=True))

        self.context.update({
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        })

        subject_uri, doc_uri = self.context['subject_uri'], self.context[
            'doc_uri']
        types = self.context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404("Graph was empty")

        self.template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            if set(subject._graph.objects(
                    subject._identifier, NS.rdf.type)) & set(map(
                        expand, types)):
                self.template_name = tn
                break

        self.context.update({
            'graph':
            graph,
            'subject':
            subject,
            'licenses':
            [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets':
            [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries':
            map(self.endpoint.normalize_query, queries),
            'template_name':
            self.template_name,
        })

        self.set_renderers()

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(self.request, self.context)
            if additional_context:
                self.context.update(additional_context)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            if request.renderers:
                format = request.renderers[0].format
                expected_doc_url = doc_forward(uri,
                                               request,
                                               format=format,
                                               described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        # NOTE: This getattrs every atttr on subject, so would force
        # memoization on any cached attributes. We call it as late as
        # possible to make sure the graph won't change afterwards, making
        # those cached results incorrect.
        self.conneg += subject

        if self.context['format']:
            try:
                return self.render_to_format(format=format)
            except KeyError:
                raise Http404
        else:
            return self.render()