示例#1
0
    def unresolve(self, url):
        """
        Turn a URL into the component parts that our views would use to process them.

        This is useful for lots of places,
        like where we want to figure out exactly what file a URL maps to.
        """
        parsed = urlparse(url)
        domain = parsed.netloc.split(':', 1)[0]
        path = parsed.path

        # TODO: Make this not depend on the request object,
        # but instead move all this logic here working on strings.
        request = RequestFactory().get(path=path, HTTP_HOST=domain)
        project_slug = request.host_project_slug = map_host_to_project_slug(request)

        # Handle returning a response
        if hasattr(project_slug, 'status_code'):
            return None

        _, __, kwargs = url_resolve(
            path,
            urlconf='readthedocs.proxito.urls',
        )

        mixin = ServeDocsMixin()
        version_slug = mixin.get_version_from_host(request, kwargs.get('version_slug'))

        final_project, lang_slug, version_slug, filename = _get_project_data_from_request(  # noqa
            request,
            project_slug=project_slug,
            subproject_slug=kwargs.get('subproject_slug'),
            lang_slug=kwargs.get('lang_slug'),
            version_slug=version_slug,
            filename=kwargs.get('filename', ''),
        )

        # Handle our backend storage not supporting directory indexes,
        # so we need to append index.html when appropriate.
        if not filename or filename.endswith('/'):
            # We need to add the index.html to find this actual file
            filename += 'index.html'

        log.info(
            'Unresolver parsed: '
            'url=%s project=%s lang_slug=%s version_slug=%s filename=%s',
            url, final_project.slug, lang_slug, version_slug, filename
        )
        return UnresolvedObject(final_project, lang_slug, version_slug, filename, parsed.fragment)
示例#2
0
    def unresolve_from_request(self, request, path):
        """
        Unresolve using a request.

        ``path`` can be a full URL, but the domain will be ignored,
        since that information is already in the request object.

        None is returned if the request isn't valid.
        """
        parsed = urlparse(path)
        path = parsed.path
        project_slug = getattr(request, 'host_project_slug', None)

        if not project_slug:
            return None

        _, __, kwargs = url_resolve(
            path,
            urlconf='readthedocs.proxito.urls',
        )

        mixin = ServeDocsMixin()
        version_slug = mixin.get_version_from_host(request, kwargs.get('version_slug'))

        final_project, lang_slug, version_slug, filename = _get_project_data_from_request(  # noqa
            request,
            project_slug=project_slug,
            subproject_slug=kwargs.get('subproject_slug'),
            lang_slug=kwargs.get('lang_slug'),
            version_slug=version_slug,
            filename=kwargs.get('filename', ''),
        )

        # Handle our backend storage not supporting directory indexes,
        # so we need to append index.html when appropriate.
        if not filename or filename.endswith('/'):
            # We need to add the index.html to find this actual file
            filename += 'index.html'

        log.debug(
            'Unresolver parsed.',
            project_slug=final_project.slug,
            lang_slug=lang_slug,
            version_slug=version_slug,
            filename=filename,
        )
        return UnresolvedObject(final_project, lang_slug, version_slug, filename, parsed.fragment)
示例#3
0
    def get(self, request, proxito_path, template_name='404.html'):
        """
        Handler for 404 pages on subdomains.

        This does a couple things:

        * Handles directory indexing for URLs that don't end in a slash
        * Handles directory indexing for README.html (for now)
        * Handles custom 404 serving

        For 404's, first search for a 404 page in the current version, then continues
        with the default version and finally, if none of them are found, the Read
        the Docs default page (Maze Found) is rendered by Django and served.
        """
        # pylint: disable=too-many-locals
        log.info('Executing 404 handler. proxito_path=%s', proxito_path)

        # Parse the URL using the normal urlconf, so we get proper subdomain/translation data
        _, __, kwargs = url_resolve(
            proxito_path,
            urlconf='readthedocs.proxito.urls',
        )

        version_slug = kwargs.get('version_slug')
        version_slug = self.get_version_from_host(request, version_slug)
        final_project, lang_slug, version_slug, filename = _get_project_data_from_request(  # noqa
            request,
            project_slug=kwargs.get('project_slug'),
            subproject_slug=kwargs.get('subproject_slug'),
            lang_slug=kwargs.get('lang_slug'),
            version_slug=version_slug,
            filename=kwargs.get('filename', ''),
        )

        storage_root_path = final_project.get_storage_path(
            type_='html',
            version_slug=version_slug,
            include_file=False,
            version_type=self.version_type,
        )

        # First, check for dirhtml with slash
        for tryfile in ('index.html', 'README.html'):
            storage_filename_path = build_media_storage.join(
                storage_root_path,
                f'{filename}/{tryfile}'.lstrip('/'),
            )
            log.debug(
                'Trying index filename: project=%s version=%s, file=%s',
                final_project.slug,
                version_slug,
                storage_filename_path,
            )
            if build_media_storage.exists(storage_filename_path):
                log.info(
                    'Redirecting to index file: project=%s version=%s, storage_path=%s',
                    final_project.slug,
                    version_slug,
                    storage_filename_path,
                )
                # Use urlparse so that we maintain GET args in our redirect
                parts = urlparse(proxito_path)
                if tryfile == 'README.html':
                    new_path = parts.path.rstrip('/') + f'/{tryfile}'
                else:
                    new_path = parts.path.rstrip('/') + '/'

                # `proxito_path` doesn't include query params.`
                query = urlparse(request.get_full_path()).query
                new_parts = parts._replace(
                    path=new_path,
                    query=query,
                )
                redirect_url = new_parts.geturl()

                # TODO: decide if we need to check for infinite redirect here
                # (from URL == to URL)
                return HttpResponseRedirect(redirect_url)

        # ``redirect_filename`` is the path without ``/<lang>/<version>`` and
        # without query, starting with a ``/``. This matches our old logic:
        # https://github.com/readthedocs/readthedocs.org/blob/4b09c7a0ab45cd894c3373f7f07bad7161e4b223/readthedocs/redirects/utils.py#L60
        # We parse ``filename`` to remove the query from it
        schema, netloc, path, params, query, fragments = urlparse(filename)
        redirect_filename = path

        # we can't check for lang and version here to decide if we need to add
        # the ``/`` or not because ``/install.html`` is a valid path to use as
        # redirect and does not include lang and version on it. It should be
        # fine always adding the ``/`` to the beginning.
        redirect_filename = '/' + redirect_filename.lstrip('/')

        # Check and perform redirects on 404 handler
        # NOTE: this redirect check must be done after trying files like
        # ``index.html`` and ``README.html`` to emulate the behavior we had when
        # serving directly from NGINX without passing through Python.
        redirect_path, http_status = self.get_redirect(
            project=final_project,
            lang_slug=lang_slug,
            version_slug=version_slug,
            filename=redirect_filename,
            full_path=proxito_path,
        )
        if redirect_path and http_status:
            try:
                return self.get_redirect_response(request, redirect_path,
                                                  proxito_path, http_status)
            except InfiniteRedirectException:
                # Continue with our normal 404 handling in this case
                pass

        # If that doesn't work, attempt to serve the 404 of the current version (version_slug)
        # Secondly, try to serve the 404 page for the default version
        # (project.get_default_version())
        doc_type = (Version.objects.filter(project=final_project,
                                           slug=version_slug).values_list(
                                               'documentation_type',
                                               flat=True).first())
        versions = [(version_slug, doc_type)]
        default_version_slug = final_project.get_default_version()
        if default_version_slug != version_slug:
            default_version_doc_type = (Version.objects.filter(
                project=final_project,
                slug=default_version_slug).values_list('documentation_type',
                                                       flat=True).first())
            versions.append((default_version_slug, default_version_doc_type))

        for version_slug_404, doc_type_404 in versions:
            if not self.allowed_user(request, final_project, version_slug_404):
                continue

            storage_root_path = final_project.get_storage_path(
                type_='html',
                version_slug=version_slug_404,
                include_file=False,
                version_type=self.version_type,
            )
            tryfiles = ['404.html']
            # SPHINX_HTMLDIR is the only builder
            # that could output a 404/index.html file.
            if doc_type_404 == SPHINX_HTMLDIR:
                tryfiles.append('404/index.html')
            for tryfile in tryfiles:
                storage_filename_path = build_media_storage.join(
                    storage_root_path, tryfile)
                if build_media_storage.exists(storage_filename_path):
                    log.info(
                        'Serving custom 404.html page: [project: %s] [version: %s]',
                        final_project.slug,
                        version_slug_404,
                    )
                    resp = HttpResponse(
                        build_media_storage.open(storage_filename_path).read())
                    resp.status_code = 404
                    return resp

        raise Http404('No custom 404 page found.')
示例#4
0
    def get(self, request, proxito_path, template_name='404.html'):
        """
        Handler for 404 pages on subdomains.

        This does a couple things:

        * Handles directory indexing for URLs that don't end in a slash
        * Handles directory indexing for README.html (for now)
        * Handles custom 404 serving

        For 404's, first search for a 404 page in the current version, then continues
        with the default version and finally, if none of them are found, the Read
        the Docs default page (Maze Found) is rendered by Django and served.
        """
        # pylint: disable=too-many-locals

        # Parse the URL using the normal urlconf, so we get proper subdomain/translation data
        _, __, kwargs = url_resolve(proxito_path,
                                    urlconf='readthedocs.proxito.urls')
        final_project, lang_slug, version_slug, filename = _get_project_data_from_request(  # noqa
            request,
            project_slug=kwargs.get('project_slug'),
            subproject_slug=kwargs.get('subproject_slug'),
            lang_slug=kwargs.get('lang_slug'),
            version_slug=kwargs.get('version_slug'),
            filename=kwargs.get('filename', ''),
        )

        # Check and perform redirects on 404 handler
        redirect_path, http_status = self.get_redirect(
            final_project,
            lang_slug,
            version_slug,
            filename,
            request.path,
        )
        if redirect_path and http_status:
            return self.get_redirect_response(request, redirect_path,
                                              http_status)

        storage_root_path = final_project.get_storage_path(
            type_='html',
            version_slug=version_slug,
            include_file=False,
        )
        storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()

        # First, check for dirhtml with slash
        for tryfile in ('index.html', 'README.html'):
            storage_filename_path = os.path.join(storage_root_path, filename,
                                                 tryfile)
            log.debug(
                'Trying index filename: project=%s version=%s, file=%s',
                final_project.slug,
                version_slug,
                storage_filename_path,
            )
            if storage.exists(storage_filename_path):
                log.info(
                    'Redirecting to index file: project=%s version=%s, url=%s',
                    final_project.slug,
                    version_slug,
                    storage_filename_path,
                )
                # Use urlparse so that we maintain GET args in our redirect
                parts = urlparse(proxito_path)
                if tryfile == 'README.html':
                    new_path = os.path.join(parts.path, tryfile)
                else:
                    new_path = parts.path + '/'
                new_parts = parts._replace(path=new_path)
                resp = HttpResponseRedirect(new_parts.geturl())
                return resp

        # If that doesn't work, attempt to serve the 404 of the current version (version_slug)
        # Secondly, try to serve the 404 page for the default version
        # (project.get_default_version())
        for version_slug_404 in [
                version_slug,
                final_project.get_default_version()
        ]:
            for tryfile in ('404.html', '404/index.html'):
                storage_root_path = final_project.get_storage_path(
                    type_='html',
                    version_slug=version_slug_404,
                    include_file=False,
                )
                storage_filename_path = os.path.join(storage_root_path,
                                                     tryfile)
                if storage.exists(storage_filename_path):
                    log.debug(
                        'Serving custom 404.html page: [project: %s] [version: %s]',
                        final_project.slug,
                        version_slug_404,
                    )
                    resp = HttpResponse(
                        storage.open(storage_filename_path).read())
                    resp.status_code = 404
                    return resp

        # Finally, return the default 404 page generated by Read the Docs
        resp = render(request, template_name)
        resp.status_code = 404
        return resp
示例#5
0
    def get(self, request, proxito_path, template_name='404.html'):
        """
        Handler for 404 pages on subdomains.

        This does a couple things:

        * Handles directory indexing for URLs that don't end in a slash
        * Handles directory indexing for README.html (for now)
        * Handles custom 404 serving

        For 404's, first search for a 404 page in the current version, then continues
        with the default version and finally, if none of them are found, the Read
        the Docs default page (Maze Found) is rendered by Django and served.
        """
        # pylint: disable=too-many-locals
        log.info('Executing 404 handler. proxito_path=%s', proxito_path)

        # Parse the URL using the normal urlconf, so we get proper subdomain/translation data
        _, __, kwargs = url_resolve(
            proxito_path,
            urlconf='readthedocs.proxito.urls',
        )
        final_project, lang_slug, version_slug, filename = _get_project_data_from_request(  # noqa
            request,
            project_slug=kwargs.get('project_slug'),
            subproject_slug=kwargs.get('subproject_slug'),
            lang_slug=kwargs.get('lang_slug'),
            version_slug=kwargs.get('version_slug'),
            filename=kwargs.get('filename', ''),
        )

        storage_root_path = final_project.get_storage_path(
            type_='html',
            version_slug=version_slug,
            include_file=False,
        )
        storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()

        # First, check for dirhtml with slash
        for tryfile in ('index.html', 'README.html'):
            storage_filename_path = os.path.join(storage_root_path, filename,
                                                 tryfile)
            log.debug(
                'Trying index filename: project=%s version=%s, file=%s',
                final_project.slug,
                version_slug,
                storage_filename_path,
            )
            if storage.exists(storage_filename_path):
                log.info(
                    'Redirecting to index file: project=%s version=%s, storage_path=%s',
                    final_project.slug,
                    version_slug,
                    storage_filename_path,
                )
                # Use urlparse so that we maintain GET args in our redirect
                parts = urlparse(proxito_path)
                if tryfile == 'README.html':
                    new_path = os.path.join(parts.path, tryfile)
                else:
                    new_path = parts.path.rstrip('/') + '/'
                new_parts = parts._replace(path=new_path)
                redirect_url = new_parts.geturl()

                # TODO: decide if we need to check for infinite redirect here
                # (from URL == to URL)
                return HttpResponseRedirect(redirect_url)

        # ``redirect_filename`` is the path without ``/<lang>/<version>`` and
        # without query, starting with a ``/``. This matches our old logic:
        # https://github.com/readthedocs/readthedocs.org/blob/4b09c7a0ab45cd894c3373f7f07bad7161e4b223/readthedocs/redirects/utils.py#L60
        # We parse ``filename`` to remove the query from it
        schema, netloc, path, params, query, fragments = urlparse(filename)
        redirect_filename = path

        # we can't check for lang and version here to decide if we need to add
        # the ``/`` or not because ``/install.html`` is a valid path to use as
        # redirect and does not include lang and version on it. It should be
        # fine always adding the ``/`` to the beginning.
        redirect_filename = '/' + redirect_filename.lstrip('/')

        # Check and perform redirects on 404 handler
        # NOTE: this redirect check must be done after trying files like
        # ``index.html`` and ``README.html`` to emulate the behavior we had when
        # serving directly from NGINX without passing through Python.
        redirect_path, http_status = self.get_redirect(
            project=final_project,
            lang_slug=lang_slug,
            version_slug=version_slug,
            filename=redirect_filename,
            full_path=proxito_path,
        )
        if redirect_path and http_status:
            return self.get_redirect_response(request, redirect_path,
                                              proxito_path, http_status)

        # If that doesn't work, attempt to serve the 404 of the current version (version_slug)
        # Secondly, try to serve the 404 page for the default version
        # (project.get_default_version())
        for version_slug_404 in [
                version_slug,
                final_project.get_default_version()
        ]:
            for tryfile in ('404.html', '404/index.html'):
                storage_root_path = final_project.get_storage_path(
                    type_='html',
                    version_slug=version_slug_404,
                    include_file=False,
                )
                storage_filename_path = os.path.join(storage_root_path,
                                                     tryfile)
                if storage.exists(storage_filename_path):
                    log.debug(
                        'Serving custom 404.html page: [project: %s] [version: %s]',
                        final_project.slug,
                        version_slug_404,
                    )
                    resp = HttpResponse(
                        storage.open(storage_filename_path).read())
                    resp.status_code = 404
                    return resp

        # Finally, return the default 404 page generated by Read the Docs
        resp = render(request, template_name)
        resp.status_code = 404
        return resp