示例#1
0
    def get(self, request):
        additional_headers = {}

        # Apache helpfully(!?) unescapes encoded hash characters. If we get one
        # we know that the browser sent a '%23' (or else would have stripped it
        # as a fragment identifier. We replace it with a '%23' so that our URI
        # canonicalisation doesn't get stuck in an endless redirect loop.
        doc_url = request.build_absolute_uri().replace('#', '%23')

        # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo',
        # has a type (ergo 'bar' is a format), and if not we assume that
        # 'foo.bar' is part of the URI
        for formats in (None, ()):
            uri, format, is_local = doc_backward(doc_url, formats)
            if uri and not IRI.match(uri):
                raise Http404("Invalid IRI")
            if not uri:
                logger.debug("Could not resolve URL to a URI: %r", doc_url)
                raise Http404("Could not resolve URL to a URI")
            types = self.get_types(uri)
            if types:
                break
            doc_url = doc_url.rsplit('.', 1)[0]
        else:
            logger.debug("Resource has no type, so is probably not known in these parts: %r", uri)
            raise Http404("Resource has no type, so is probably not known in these parts")

        expected_doc_url = urlparse.urljoin(doc_url, doc_forward(uri, request, format=format, described=True))
        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True))

        self.context.update({
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        })

        subject_uri, doc_uri = self.context['subject_uri'], self.context['doc_uri']
        types = self.context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404("Graph was empty")

        self.template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)):
                self.template_name = tn
                break

        self.context.update({
            'graph': graph,
            'subject': subject,
            'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries': map(self.endpoint.normalize_query, queries),
            'template_name': self.template_name,
        })

        self.set_renderers()

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(self.request, self.context)
            if additional_context:
                self.context.update(additional_context)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            if request.renderers:
                format = request.renderers[0].format
                expected_doc_url = doc_forward(uri, request, format=format, described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        # NOTE: This getattrs every atttr on subject, so would force
        # memoization on any cached attributes. We call it as late as
        # possible to make sure the graph won't change afterwards, making
        # those cached results incorrect.
        self.conneg += subject

        if self.context['format']:
            try:
                return self.render_to_format(format=format)
            except KeyError:
                raise Http404
        else:
            return self.render()
示例#2
0
    def get(self, request):
        additional_headers = {}
        doc_url = request.build_absolute_uri()

        uri, format, is_local = doc_backward(doc_url, set(self._renderers_by_format))
        if not uri:
            logger.debug("Could not resolve URL to a URI: %r", doc_url)
            raise Http404

        expected_doc_url = doc_forward(uri, request, format=format, described=True)

        types = self.get_types(uri)
        if not types:
            logger.debug("Resource has no type, so is probably not known in these parts: %r", uri)
            raise Http404

        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug("Request for a non-canonical doc URL (%r) for %r, redirecting to %r", doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            renderers = self.get_renderers(request)
            if renderers:
                format = renderers[0].format
                expected_doc_url = doc_forward(uri, request, format=format, described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        doc_uri = rdflib.URIRef(doc_forward(uri, request, format=None, described=True))

        context = {
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        }

        subject_uri, doc_uri = context['subject_uri'], context['doc_uri']
        types = context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(request=request,
                                                   graph=graph,
                                                   doc_uri=doc_uri,
                                                   subject_uri=subject_uri,
                                                   subject=subject,
                                                   endpoint=self.endpoint,
                                                   renderers=self._renderers)
            if additional_context:
                context.update(additional_context)

        context.update({
            'graph': graph,
            'subject': subject,
            'licenses': [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets': [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries': queries,
            'template_name': subject.template_name,
        })

        template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            print tn, types, subject.get_all('rdf:type')
            if set(subject._graph.objects(subject._identifier, NS.rdf.type)) & set(map(expand, types)):
                template_name = tn
                break

        if context['format']:
            try:
                return self.render_to_format(request, context, template_name, format)
            except KeyError:
                raise Http404
        else:
            return self.render(request, context, template_name)
示例#3
0
 def testUnicodeBackward(self):
     for uri, url in self.TESTS:
         if isinstance(uri, unicode):
             self.assertEqual(doc_backward(url)[0], rdflib.URIRef(uri))
示例#4
0
    def get(self, request):
        additional_headers = {}

        # Apache helpfully(!?) unescapes encoded hash characters. If we get one
        # we know that the browser sent a '%23' (or else would have stripped it
        # as a fragment identifier. We replace it with a '%23' so that our URI
        # canonicalisation doesn't get stuck in an endless redirect loop.
        doc_url = request.build_absolute_uri().replace('#', '%23')

        # Given a URL 'http://example.org/doc/foo.bar' we check whether 'foo',
        # has a type (ergo 'bar' is a format), and if not we assume that
        # 'foo.bar' is part of the URI
        for formats in (None, ()):
            uri, format, is_local = doc_backward(doc_url, formats)
            if uri and not IRI.match(uri):
                raise Http404("Invalid IRI")
            if not uri:
                logger.debug("Could not resolve URL to a URI: %r", doc_url)
                raise Http404("Could not resolve URL to a URI")
            types = self.get_types(uri)
            if types:
                break
            doc_url = doc_url.rsplit('.', 1)[0]
        else:
            logger.debug(
                "Resource has no type, so is probably not known in these parts: %r",
                uri)
            raise Http404(
                "Resource has no type, so is probably not known in these parts"
            )

        expected_doc_url = urlparse.urljoin(
            doc_url, doc_forward(uri, request, format=format, described=True))
        if self.check_canonical and expected_doc_url != doc_url:
            logger.debug(
                "Request for a non-canonical doc URL (%r) for %r, redirecting to %r",
                doc_url, uri, expected_doc_url)
            return HttpResponsePermanentRedirect(expected_doc_url)

        doc_uri = rdflib.URIRef(
            doc_forward(uri, request, format=None, described=True))

        self.context.update({
            'subject_uri': uri,
            'doc_uri': doc_uri,
            'format': format,
            'types': types,
            'show_follow_link': not is_local,
            'no_index': not is_local,
            'additional_headers': additional_headers,
        })

        subject_uri, doc_uri = self.context['subject_uri'], self.context[
            'doc_uri']
        types = self.context['types']

        queries, graph = [], rdflib.ConjunctiveGraph()
        for prefix, namespace_uri in NS.iteritems():
            graph.namespace_manager.bind(prefix, namespace_uri)

        graph += ((subject_uri, NS.rdf.type, t) for t in types)
        subject = Resource(subject_uri, graph, self.endpoint)

        for query in subject.get_queries():
            graph += self.endpoint.query(query)
            queries.append(query)

        licenses, datasets = set(), set()
        for graph_name in graph.subjects(NS['ov'].describes):
            graph.add((doc_uri, NS['dcterms'].source, graph_name))
            licenses.update(graph.objects(graph_name, NS['dcterms'].license))
            datasets.update(graph.objects(graph_name, NS['void'].inDataset))

        if len(licenses) == 1:
            for license_uri in licenses:
                graph.add((doc_uri, NS['dcterms'].license, license_uri))

        if not graph:
            logger.debug("Graph for %r was empty; 404ing", uri)
            raise Http404("Graph was empty")

        self.template_name = subject.template_name or self.template_name
        for template_override in self.template_overrides:
            tn, types = template_override[0], template_override[1:]
            if set(subject._graph.objects(
                    subject._identifier, NS.rdf.type)) & set(map(
                        expand, types)):
                self.template_name = tn
                break

        self.context.update({
            'graph':
            graph,
            'subject':
            subject,
            'licenses':
            [Resource(uri, graph, self.endpoint) for uri in licenses],
            'datasets':
            [Resource(uri, graph, self.endpoint) for uri in datasets],
            'queries':
            map(self.endpoint.normalize_query, queries),
            'template_name':
            self.template_name,
        })

        self.set_renderers()

        for doc_rdf_processor in self._doc_rdf_processors:
            additional_context = doc_rdf_processor(self.request, self.context)
            if additional_context:
                self.context.update(additional_context)

        # If no format was given explicitly (i.e. format parameter or
        # extension) we inspect the Content-Type header.
        if not format:
            if request.renderers:
                format = request.renderers[0].format
                expected_doc_url = doc_forward(uri,
                                               request,
                                               format=format,
                                               described=True)
        if expected_doc_url != doc_url:
            additional_headers['Content-Location'] = expected_doc_url

        # NOTE: This getattrs every atttr on subject, so would force
        # memoization on any cached attributes. We call it as late as
        # possible to make sure the graph won't change afterwards, making
        # those cached results incorrect.
        self.conneg += subject

        if self.context['format']:
            try:
                return self.render_to_format(format=format)
            except KeyError:
                raise Http404
        else:
            return self.render()
示例#5
0
 def testUnicodeBackward(self):
     for uri, url in self.TESTS:
         if isinstance(uri, unicode):
             self.assertRelativeEqual(doc_backward(url)[0], uri)
             self.assertRelativeEqual(doc_forward(doc_backward(url)[0], described=True), url)