Пример #1
0
    def _iter(self, sparql_results_type, fields, bindings, boolean, triples):
        queue = Queue.Queue()
        graph = Graph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        triples = list(triples)
        with statsd.timer('humfrey.streaming.rdflib-serializer.add-triples.' +
                          self.plugin_name):
            graph += triples
        serializer_thread = threading.Thread(target=self._serialize_to_queue,
                                             args=(graph, queue))

        with statsd.timer('humfrey.streaming.rdflib-serializer.serialize.' +
                          self.plugin_name):
            serializer_thread.start()
            while True:
                type, value = queue.get()
                if type == 'data':
                    yield value
                elif type == 'sentinel':
                    break
                elif type == 'exception':
                    raise value[0], value[1], value[2]
            serializer_thread.join()
Пример #2
0
    def get(self):
        """
        Returns an in-memory object representing the stream.

        You will either get a SparqlResultsList, a bool, or a ConjunctiveGraph.
        """
        if self._cached_get is None:
            sparql_results_type = self.get_sparql_results_type()
            if sparql_results_type == 'resultset':
                self._cached_get = SparqlResultList(self.get_fields(),
                                                    self.get_bindings())
            elif sparql_results_type == 'boolean':
                self._cached_get = self.get_boolean()
            elif sparql_results_type == 'graph':
                graph = rdflib.ConjunctiveGraph()
                for prefix, namespace_uri in NS.iteritems():
                    graph.namespace_manager.bind(prefix, namespace_uri)
                graph += self.get_triples()
                self._cached_get = graph
            else:
                raise AssertionError(
                    "Unexpected results type: {0}".format(sparql_results_type))
            for name in ('query', 'duration'):
                if hasattr(self, name):
                    setattr(self._cached_get, name, getattr(self, name))
        return self._cached_get
Пример #3
0
 def label2(self):
     for prefix, uri in NS.iteritems():
         if self._identifier.startswith(uri):
             localpart = self._identifier[len(uri):]
             if LOCALPART_RE.match(localpart):
                 return '%s:%s' % (prefix, localpart)
     return self._identifier
Пример #4
0
    def __init__(self, url, update_url=None, namespaces={}, preferred_media_types=()):
        self._url, self._update_url = url, update_url
        self._namespaces = NS.copy()
        self._namespaces.update(namespaces)
        self._cache = defaultdict(dict)

        self._accept_header = self._get_accept_header(preferred_media_types or self._supported_media_types)
Пример #5
0
    def get(self, request):
        query = request.REQUEST.get('query')
        form = SparqlQueryForm(request.REQUEST if query else None,
                               formats=self.get_format_choices())

        context = self.context
        context.update({
            'namespaces': sorted(NS.items()),
            'form': form,
            'store': self.store
        })

        if form.is_valid():
            try:
                results = self.perform_query(
                    request, query, form.cleaned_data['common_prefixes'])
            except QueryError, e:
                context['error'] = e.message
                context['status_code'] = e.status_code
            else:
                context['additional_headers'][
                    'X-Humfrey-SPARQL-Duration'] = results.duration

                context['queries'] = [results.query]
                context['duration'] = results.duration
                context['results'] = results

                if results.format_type == 'sparql-results':
                    return self._sparql_results_view(request, context)
                elif results.format_type == 'graph':
                    return self._graph_view(request, context)
                raise AssertionError("Unexpected format type: {0}".format(
                    results.format_type))
Пример #6
0
    def get(self, request):
        privileges = self.get_user_privileges(request)

        query = request.REQUEST.get('query')
        form = SparqlQueryForm(request.REQUEST if query else None,
                               formats=self.get_format_choices())

        context = {
            'namespaces': sorted(NS.items()),
            'form': form,
            'store': self.store,
        }

        if privileges['throttle']:
            additional_headers = context['additional_headers'] = {
                'X-Humfrey-SPARQL-Throttle-Threshold': privileges['throttle_threshold'],
                'X-Humfrey-SPARQL-Deny-Threshold': privileges['deny_threshold'],
                'X-Humfrey-SPARQL-Intensity-Decay': privileges['intensity_decay'],
            }

        if form.is_valid():
            try:
                results, intensity = self.perform_query(request, query, form.cleaned_data['common_prefixes'], privileges)
                if intensity is not None:
                    additional_headers['X-Humfrey-SPARQL-Intensity'] = intensity

            except urllib2.HTTPError, e:
                context['error'] = e.read() #parse(e).find('.//pre').text
                context['status_code'] = e.code
            except self.ConcurrentQueryException, e:
                context['error'] = "You cannot perform more than one query at a time.\nPlease wait for your previous query to complete or time out first."
                context['status_code'] = 403
Пример #7
0
    def execute(self, transform_manager):

        for prefix, uri in NS.iteritems():
            try:
                self.load_vocabulary(transform_manager, prefix, uri)
            except Exception, e:
                logger.exception("Failed to load vocabulary: %r from %r", prefix, uri)
Пример #8
0
 def label2(self):
     for prefix, uri in NS.iteritems():
         if self._identifier.startswith(uri):
             localpart = self._identifier[len(uri):]
             if LOCALPART_RE.match(localpart):
                 return '%s:%s' % (prefix, localpart)
     return self._identifier
Пример #9
0
    def get(self, request):
        query = request.REQUEST.get('query')
        form = SparqlQueryForm(request.REQUEST if query else None,
                               formats=self.get_format_choices())

        context = self.context
        context.update({
            'namespaces': sorted(NS.items()),
            'form': form,
            'store': self.store
        })

        if form.is_valid():
            try:
                results = self.perform_query(request, query, form.cleaned_data['common_prefixes'])
            except QueryError, e:
                context['error'] = e.message
                context['status_code'] = e.status_code
            else:
                context['additional_headers']['X-Humfrey-SPARQL-Duration'] = results.duration

                context['queries'] = [results.query]
                context['duration'] = results.duration
                context['results'] = results

                if results.format_type == 'sparql-results':
                    return self._sparql_results_view(request, context)
                elif results.format_type == 'graph':
                    return self._graph_view(request, context)
                raise AssertionError("Unexpected format type: {0}".format(results.format_type))
Пример #10
0
    def execute(self, transform_manager):

        for prefix, uri in NS.iteritems():
            try:
                self.load_vocabulary(transform_manager, prefix, uri)
            except Exception, e:
                logger.exception("Failed to load vocabulary: %r from %r",
                                 prefix, uri)
Пример #11
0
    def __init__(self,
                 url,
                 update_url=None,
                 namespaces={},
                 preferred_media_types=()):
        self._url, self._update_url = url, update_url
        self._namespaces = NS.copy()
        self._namespaces.update(namespaces)
        self._cache = defaultdict(dict)

        self._accept_header = self._get_accept_header(
            preferred_media_types or self._supported_media_types)
Пример #12
0
    def _iter(self, sparql_results_type, fields, bindings, boolean, triples):
        queue = Queue.Queue()
        graph = Graph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        triples = list(triples)
        with statsd.timer('humfrey.streaming.rdflib-serializer.add-triples.' + self.plugin_name):
            graph += triples
        serializer_thread = threading.Thread(target=self._serialize_to_queue,
                                             args=(graph, queue))

        with statsd.timer('humfrey.streaming.rdflib-serializer.serialize.' + self.plugin_name):
            serializer_thread.start()
            while True:
                type, value = queue.get()
                if type == 'data':
                    yield value
                elif type == 'sentinel':
                    break
                elif type == 'exception':
                    raise value[0], value[1], value[2]
            serializer_thread.join()
Пример #13
0
    def get(self):
        """
        Returns an in-memory object representing the stream.

        You will either get a SparqlResultsList, a bool, or a ConjunctiveGraph.
        """
        if self._cached_get is None:
            sparql_results_type = self.get_sparql_results_type()
            if sparql_results_type == 'resultset':
                self._cached_get = SparqlResultList(self.get_fields(), self.get_bindings())
            elif sparql_results_type == 'boolean':
                self._cached_get = self.get_boolean()
            elif sparql_results_type == 'graph':
                graph = rdflib.ConjunctiveGraph()
                for prefix, namespace_uri in NS.iteritems():
                    graph.namespace_manager.bind(prefix, namespace_uri)
                graph += self.get_triples()
                self._cached_get = graph
            else:
                raise AssertionError("Unexpected results type: {0}".format(sparql_results_type))
            for name in ('query', 'duration'):
                if hasattr(self, name):
                    setattr(self._cached_get, name, getattr(self, name))
        return self._cached_get
Пример #14
0
    def get(self, request):
        additional_headers = {}

        # Apache helpfully(!?) unescapes encoded hash characters. If we get one
        # we know that the browser sent a '%23' (or else would have stripped it
        # as a fragment identifier. We replace it with a '%23' so that our URI
        # canonicalisation doesn't get stuck in an endless redirect loop.
        doc_url = request.build_absolute_uri().replace('#', '%23')

        # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo',
        # has a type (ergo 'bar' is a format), and if not we assume that
        # 'foo.bar' is part of the URI
        for formats in (None, ()):
            uri, format, is_local = doc_backward(doc_url, formats)
            if uri and not IRI.match(uri):
                raise Http404("Invalid IRI")
            if not uri:
                logger.debug("Could not resolve URL to a URI: %r", doc_url)
                raise Http404("Could not resolve URL to a URI")
            types = self.get_types(uri)
            if types:
                break
            doc_url = doc_url.rsplit('.', 1)[0]
        else:
            logger.debug(
                "Resource has no type, so is probably not known in these parts: %r",
                uri)
            raise Http404(
                "Resource has no type, so is probably not known in these parts"
            )

        expected_doc_url = urlparse.urljoin(
            doc_url, doc_forward(uri, request, format=format, described=True))
        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug(
                "Request for a non-canonical doc URL (%r) for %r, redirecting to %r",
                doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        doc_uri = rdflib.URIRef(
            doc_forward(uri, request, format=None, described=True))

        self.context.update({
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        })

        subject_uri, doc_uri = self.context['subject_uri'], self.context[
            'doc_uri']
        types = self.context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404("Graph was empty")

        self.template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            if set(subject._graph.objects(
                    subject._identifier, NS.rdf.type)) & set(map(
                        expand, types)):
                self.template_name = tn
                break

        self.context.update({
            'graph':
            graph,
            'subject':
            subject,
            'licenses':
            [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets':
            [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries':
            map(self.endpoint.normalize_query, queries),
            'template_name':
            self.template_name,
        })

        self.set_renderers()

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(self.request, self.context)
            if additional_context:
                self.context.update(additional_context)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            if request.renderers:
                format = request.renderers[0].format
                expected_doc_url = doc_forward(uri,
                                               request,
                                               format=format,
                                               described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        # NOTE: This getattrs every atttr on subject, so would force
        # memoization on any cached attributes. We call it as late as
        # possible to make sure the graph won't change afterwards, making
        # those cached results incorrect.
        self.conneg += subject

        if self.context['format']:
            try:
                return self.render_to_format(format=format)
            except KeyError:
                raise Http404
        else:
            return self.render()
Пример #15
0
    def get(self, request):
        additional_headers = {}
        doc_url = request.build_absolute_uri()

        uri, format, is_local = doc_backward(doc_url, set(self._renderers_by_format))
        if not uri:
            logger.debug("Could not resolve URL to a URI: %r", doc_url)
            raise Http404

        expected_doc_url = doc_forward(uri, request, format=format, described=True)

        types = self.get_types(uri)
        if not types:
            logger.debug("Resource has no type, so is probably not known in these parts: %r", uri)
            raise Http404

        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            renderers = self.get_renderers(request)
            if renderers:
                format = renderers[0].format
                expected_doc_url = doc_forward(uri, request, format=format, described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True))

        context = {
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        }

        subject_uri, doc_uri = context['subject_uri'], context['doc_uri']
        types = context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(request=request,
                                                   graph=graph,
                                                   doc_uri=doc_uri,
                                                   subject_uri=subject_uri,
                                                   subject=subject,
                                                   endpoint=self.endpoint,
                                                   renderers=self._renderers)
            if additional_context:
                context.update(additional_context)

        context.update({
            'graph': graph,
            'subject': subject,
            'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries': queries,
            'template_name': subject.template_name,
        })

        template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            print tn, types, subject.get_all('rdf:type')
            if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)):
                template_name = tn
                break

        if context['format']:
            try:
                return self.render_to_format(request, context, template_name, format)
            except KeyError:
                raise Http404
        else:
            return self.render(request, context, template_name)
Пример #16
0
 def __init__(self, url, update_url=None, namespaces={}):
     self._url, self._update_url = url, update_url
     self._namespaces = NS.copy()
     self._namespaces.update(namespaces)
     self._cache = defaultdict(dict)
Пример #17
0
    def get(self, request):
        additional_headers = {}

        # Apache helpfully(!?) unescapes encoded hash characters. If we get one
        # we know that the browser sent a '%23' (or else would have stripped it
        # as a fragment identifier. We replace it with a '%23' so that our URI
        # canonicalisation doesn't get stuck in an endless redirect loop.
        doc_url = request.build_absolute_uri().replace('#', '%23')

        # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo',
        # has a type (ergo 'bar' is a format), and if not we assume that
        # 'foo.bar' is part of the URI
        for formats in (None, ()):
            uri, format, is_local = doc_backward(doc_url, formats)
            if uri and not IRI.match(uri):
                raise Http404("Invalid IRI")
            if not uri:
                logger.debug("Could not resolve URL to a URI: %r", doc_url)
                raise Http404("Could not resolve URL to a URI")
            types = self.get_types(uri)
            if types:
                break
            doc_url = doc_url.rsplit('.', 1)[0]
        else:
            logger.debug("Resource has no type, so is probably not known in these parts: %r", uri)
            raise Http404("Resource has no type, so is probably not known in these parts")

        expected_doc_url = urlparse.urljoin(doc_url, doc_forward(uri, request, format=format, described=True))
        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True))

        self.context.update({
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        })

        subject_uri, doc_uri = self.context['subject_uri'], self.context['doc_uri']
        types = self.context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404("Graph was empty")

        self.template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)):
                self.template_name = tn
                break

        self.context.update({
            'graph': graph,
            'subject': subject,
            'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries': map(self.endpoint.normalize_query, queries),
            'template_name': self.template_name,
        })

        self.set_renderers()

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(self.request, self.context)
            if additional_context:
                self.context.update(additional_context)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            if request.renderers:
                format = request.renderers[0].format
                expected_doc_url = doc_forward(uri, request, format=format, described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        # NOTE: This getattrs every atttr on subject, so would force
        # memoization on any cached attributes. We call it as late as
        # possible to make sure the graph won't change afterwards, making
        # those cached results incorrect.
        self.conneg += subject

        if self.context['format']:
            try:
                return self.render_to_format(format=format)
            except KeyError:
                raise Http404
        else:
            return self.render()