def get(self, request, project): """ Serve custom user's defined ``/robots.txt``. If the user added a ``robots.txt`` in the "default version" of the project, we serve it directly. """ # Use the ``robots.txt`` file from the default version configured version_slug = project.get_default_version() version = project.versions.get(slug=version_slug) no_serve_robots_txt = any([ # If the default version is private or, version.privacy_level == constants.PRIVATE, # default version is not active or, not version.active, # default version is not built not version.built, ]) if no_serve_robots_txt: # ... we do return a 404 raise Http404() storage_path = project.get_storage_path( type_='html', version_slug=version_slug, include_file=False, version_type=self.version_type, ) path = build_media_storage.join(storage_path, 'robots.txt') if build_media_storage.exists(path): url = build_media_storage.url(path) url = urlparse(url)._replace(scheme='', netloc='').geturl() return self._serve_docs( request, final_project=project, path=url, ) sitemap_url = '{scheme}://{domain}/sitemap.xml'.format( scheme='https', domain=project.subdomain(), ) context = { 'sitemap_url': sitemap_url, 'hidden_paths': self._get_hidden_paths(project), } return render( request, 'robots.txt', context, content_type='text/plain', )
def get( self, request, project_slug=None, subproject_slug=None, subproject_slash=None, lang_slug=None, version_slug=None, filename='', ): # noqa """ Take the incoming parsed URL's and figure out what file to serve. ``subproject_slash`` is used to determine if the subproject URL has a slash, so that we can decide if we need to serve docs or add a /. """ version_slug = self.get_version_from_host(request, version_slug) final_project, lang_slug, version_slug, filename = _get_project_data_from_request( # noqa request, project_slug=project_slug, subproject_slug=subproject_slug, lang_slug=lang_slug, version_slug=version_slug, filename=filename, ) log.debug( 'Serving docs: project=%s, subproject=%s, lang_slug=%s, version_slug=%s, filename=%s', final_project.slug, subproject_slug, lang_slug, version_slug, filename) # Handle requests that need canonicalizing (eg. HTTP -> HTTPS, redirect to canonical domain) if hasattr(request, 'canonicalize'): try: return self.canonical_redirect(request, final_project, version_slug, filename) except InfiniteRedirectException: # Don't redirect in this case, since it would break things pass # Handle a / redirect when we aren't a single version if all([ lang_slug is None, # External versions/builds will always have a version, # because it is taken from the host name version_slug is None or hasattr(request, 'external_domain'), filename == '', not final_project.single_version, ]): return self.system_redirect(request, final_project, lang_slug, version_slug, filename) # Handle `/projects/subproject` URL redirection: # when there _is_ a subproject_slug but not a subproject_slash if all([ final_project.single_version, filename == '', subproject_slug, not subproject_slash, ]): return self.system_redirect(request, final_project, lang_slug, version_slug, filename) if all([ (lang_slug is None or version_slug is None), not final_project.single_version, self.version_type != EXTERNAL, ]): log.warning( 'Invalid URL for project with versions. url=%s, project=%s', filename, final_project.slug) raise Http404('Invalid URL for project with versions') # TODO: un-comment when ready to perform redirect here # redirect_path, http_status = self.get_redirect( # final_project, # lang_slug, # version_slug, # filename, # request.path, # ) # if redirect_path and http_status: # return self.get_redirect_response(request, redirect_path, http_status) # Check user permissions and return an unauthed response if needed if not self.allowed_user(request, final_project, version_slug): return self.get_unauthed_response(request, final_project) storage_path = final_project.get_storage_path( type_='html', version_slug=version_slug, include_file=False, version_type=self.version_type, ) # If ``filename`` is empty, serve from ``/`` path = build_media_storage.join(storage_path, filename.lstrip('/')) # Handle our backend storage not supporting directory indexes, # so we need to append index.html when appropriate. if path[-1] == '/': # We need to add the index.html before ``storage.url`` since the # Signature and Expire time is calculated per file. path += 'index.html' # NOTE: calling ``.url`` will remove the trailing slash storage_url = build_media_storage.url(path, http_method=request.method) # URL without scheme and domain to perform an NGINX internal redirect parsed_url = urlparse(storage_url)._replace(scheme='', netloc='') final_url = parsed_url.geturl() return self._serve_docs( request, final_project=final_project, version_slug=version_slug, path=final_url, )
def get( self, request, project_slug=None, type_=None, version_slug=None, lang_slug=None, subproject_slug=None, ): """ Download a specific piece of media. Perform an auth check if serving in private mode. This view is used to download a file using old-style URLs (download from the dashboard) and new-style URLs (download from the same domain as docs). Basically, the parameters received by the GET view are different (``project_slug`` does not come in the new-style URLs, for example) and we need to take it from the request. Once we get the final ``version`` to be served, everything is the same for both paths. .. warning:: This is linked directly from the HTML pages. It should only care about the Version permissions, not the actual Project permissions. """ if self.same_domain_url: # It uses the request to get the ``project``. The rest of arguments come # from the URL. final_project, lang_slug, version_slug, filename = _get_project_data_from_request( # noqa request, project_slug=None, subproject_slug=subproject_slug, lang_slug=lang_slug, version_slug=version_slug, ) if not self.allowed_user(request, final_project, version_slug): return self.get_unauthed_response(request, final_project) # We don't use ``.public`` in this filter because the access # permission was already granted by ``.allowed_user`` version = get_object_or_404( final_project.versions, slug=version_slug, ) else: # All the arguments come from the URL. version = get_object_or_404( Version.objects.public(user=request.user), project__slug=project_slug, slug=version_slug, ) # Send media download to analytics - sensitive data is anonymized analytics_event.delay( event_category='Build Media', event_action=f'Download {type_}', event_label=str(version), ua=request.META.get('HTTP_USER_AGENT'), uip=get_client_ip(request), ) storage_path = version.project.get_storage_path( type_=type_, version_slug=version_slug, version_type=version.type, ) # URL without scheme and domain to perform an NGINX internal redirect url = build_media_storage.url(storage_path) url = urlparse(url)._replace(scheme='', netloc='').geturl() return self._serve_docs( request, final_project=version.project, version_slug=version.slug, path=url, download=True, )
def _create_intersphinx_data(version, commit, build): """ Create intersphinx data for this version. :param version: Version instance :param commit: Commit that updated path :param build: Build id """ if not version.is_sphinx_type: return html_storage_path = version.project.get_storage_path( type_='html', version_slug=version.slug, include_file=False ) json_storage_path = version.project.get_storage_path( type_='json', version_slug=version.slug, include_file=False ) object_file = build_media_storage.join(html_storage_path, 'objects.inv') if not build_media_storage.exists(object_file): log.debug('No objects.inv, skipping intersphinx indexing.') return type_file = build_media_storage.join(json_storage_path, 'readthedocs-sphinx-domain-names.json') types = {} titles = {} if build_media_storage.exists(type_file): try: data = json.load(build_media_storage.open(type_file)) types = data['types'] titles = data['titles'] except Exception: log.exception('Exception parsing readthedocs-sphinx-domain-names.json') # These classes are copied from Sphinx # https://github.com/sphinx-doc/sphinx/blob/d79d041f4f90818e0b495523fdcc28db12783caf/sphinx/ext/intersphinx.py#L400-L403 # noqa class MockConfig: intersphinx_timeout = None tls_verify = False user_agent = None class MockApp: srcdir = '' config = MockConfig() def warn(self, msg): log.warning('Sphinx MockApp.', msg=msg) # Re-create all objects from the new build of the version object_file_url = build_media_storage.url(object_file) if object_file_url.startswith('/'): # Filesystem backed storage simply prepends MEDIA_URL to the path to get the URL # This can cause an issue if MEDIA_URL is not fully qualified object_file_url = settings.RTD_INTERSPHINX_URL + object_file_url invdata = intersphinx.fetch_inventory(MockApp(), '', object_file_url) for key, value in sorted(invdata.items() or {}): domain, _type = key.split(':', 1) for name, einfo in sorted(value.items()): # project, version, url, display_name # ('Sphinx', '1.7.9', 'faq.html#epub-faq', 'Epub info') try: url = einfo[2] if '#' in url: doc_name, anchor = url.split( '#', # The anchor can contain ``#`` characters maxsplit=1 ) else: doc_name, anchor = url, '' display_name = einfo[3] except Exception: log.exception( 'Error while getting sphinx domain information. Skipping...', project_slug=version.project.slug, version_slug=version.slug, sphinx_domain='{domain}->{name}', ) continue # HACK: This is done because the difference between # ``sphinx.builders.html.StandaloneHTMLBuilder`` # and ``sphinx.builders.dirhtml.DirectoryHTMLBuilder``. # They both have different ways of generating HTML Files, # and therefore the doc_name generated is different. # More info on: http://www.sphinx-doc.org/en/master/usage/builders/index.html#builders # Also see issue: https://github.com/readthedocs/readthedocs.org/issues/5821 if doc_name.endswith('/'): doc_name += 'index.html' html_file = HTMLFile.objects.filter( project=version.project, version=version, path=doc_name, build=build, ).first() if not html_file: log.debug( 'HTMLFile object not found.', project_slug=version.project.slug, version_slug=version.slug, build_id=build, doc_name=doc_name ) # Don't create Sphinx Domain objects # if the HTMLFile object is not found. continue SphinxDomain.objects.create( project=version.project, version=version, html_file=html_file, domain=domain, name=name, display_name=display_name, type=_type, type_display=types.get(f'{domain}:{_type}', ''), doc_name=doc_name, doc_display=titles.get(doc_name, ''), anchor=anchor, commit=commit, build=build, )