def serve_coll_page(self, environ, coll='$root'): """Render and serve a collections search page (search.html). :param dict environ: The WSGI environment dictionary for the request :param str coll: The name of the collection to serve the collections search page for :return: The WbResponse containing the collections search page :rtype: WbResponse """ if not self.is_valid_coll(coll): self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll)) self.setup_paths(environ, coll) metadata = self.get_metadata(coll) view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html') wb_prefix = environ.get('SCRIPT_NAME') if wb_prefix: wb_prefix += '/' content = view.render_to_string(environ, wb_prefix=wb_prefix, metadata=metadata, coll=coll) return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
def proxy_fetch(self, env, url): """Proxy mode only endpoint that handles OPTIONS requests and COR fetches for Preservation Worker. Due to normal cross-origin browser restrictions in proxy mode, auto fetch worker cannot access the CSS rules of cross-origin style sheets and must re-fetch them in a manner that is CORS safe. This endpoint facilitates that by fetching the stylesheets for the auto fetch worker and then responds with its contents :param dict env: The WSGI environment dictionary :param str url: The URL of the resource to be fetched :return: WbResponse that is either response to an Options request or the results of fetching url :rtype: WbResponse """ if not self.is_proxy_enabled(env): # we are not in proxy mode so just respond with forbidden return WbResponse.text_response( 'proxy mode must be enabled to use this endpoint', status='403 Forbidden') if env.get('REQUEST_METHOD') == 'OPTIONS': return WbResponse.options_response(env) # ensure full URL request_url = env['REQUEST_URI'] # replace with /id_ so we do not get rewritten url = request_url.replace('/proxy-fetch', '/id_') # update WSGI environment object env['REQUEST_URI'] = self.proxy_coll + url env['PATH_INFO'] = env['PATH_INFO'].replace('/proxy-fetch', self.proxy_coll + '/id_') # make request using normal serve_content response = self.serve_content(env, self.proxy_coll, url) # for WR if isinstance(response, WbResponse): response.add_access_control_headers(env=env) return response
def proxy_fetch(self, env, url): """Proxy mode only endpoint that handles OPTIONS requests and COR fetches for Preservation Worker. Due to normal cross-origin browser restrictions in proxy mode, auto fetch worker cannot access the CSS rules of cross-origin style sheets and must re-fetch them in a manner that is CORS safe. This endpoint facilitates that by fetching the stylesheets for the auto fetch worker and then responds with its contents :param dict env: The WSGI environment dictionary :param str url: The URL of the resource to be fetched :return: WbResponse that is either response to an Options request or the results of fetching url :rtype: WbResponse """ if not self.is_proxy_enabled(env): # we are not in proxy mode so just respond with forbidden return WbResponse.text_response('proxy mode must be enabled to use this endpoint', status='403 Forbidden') if env.get('REQUEST_METHOD') == 'OPTIONS': return WbResponse.options_response(env) # ensure full URL request_url = env['REQUEST_URI'] # replace with /id_ so we do not get rewritten url = request_url.replace('/proxy-fetch', '/id_') # update WSGI environment object env['REQUEST_URI'] = self.proxy_coll + url env['PATH_INFO'] = env['PATH_INFO'].replace('/proxy-fetch', self.proxy_coll + '/id_') # make request using normal serve_content response = self.serve_content(env, self.proxy_coll, url) # for WR if isinstance(response, WbResponse): response.add_access_control_headers(env=env) return response
def serve_cdx(self, environ, coll='$root'): """Make the upstream CDX query for a collection and response with the results of the query :param dict environ: The WSGI environment dictionary for the request :param str coll: The name of the collection this CDX query is for :return: The WbResponse containing the results of the CDX query :rtype: WbResponse """ base_url = self.rewriterapp.paths['cdx-server'] #if coll == self.all_coll: # coll = '*' cdx_url = base_url.format(coll=coll) if environ.get('QUERY_STRING'): cdx_url += '&' if '?' in cdx_url else '?' cdx_url += environ.get('QUERY_STRING') try: res = requests.get(cdx_url, stream=True) content_type = res.headers.get('Content-Type') return WbResponse.bin_stream(StreamIter(res.raw), content_type=content_type) except Exception as e: return WbResponse.text_response('Error: ' + str(e), status='400 Bad Request')
def serve_record(self, environ, coll='$root', url=''): if coll in self.warcserver.list_fixed_routes(): return WbResponse.text_response( 'Error: Can Not Record Into Custom Collection "{0}"'.format( coll)) return self.serve_content(environ, coll, url, record=True)
def format_response(self, response, wb_url, full_prefix, is_timegate, is_proxy, timegate_closest_ts=None): memento_ts = None if not isinstance(response, WbResponse): content_type = 'text/html' # if not replay outer frame, specify utf-8 charset if not self.is_framed_replay(wb_url): content_type += '; charset=utf-8' else: memento_ts = timegate_closest_ts or wb_url.timestamp response = WbResponse.text_response(response, content_type=content_type) if self.enable_memento and response.status_headers.statusline.startswith( '200'): self._add_memento_links(wb_url.url, full_prefix, None, memento_ts, response.status_headers, is_timegate, is_proxy, is_memento=not is_timegate) return response
def serve_coll_page(self, environ, coll='$root'): """Render and serve a collections search page (search.html). :param dict environ: The WSGI environment dictionary for the request :param str coll: The name of the collection to serve the collections search page for :return: The WbResponse containing the collections search page :rtype: WbResponse """ if not self.is_valid_coll(coll): self.raise_not_found(environ, 'coll_not_found', coll) self.setup_paths(environ, coll) coll_config = self.get_coll_config(coll) metadata = coll_config.get('metadata') view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html') wb_prefix = environ.get('SCRIPT_NAME', '') if wb_prefix: wb_prefix += '/' content = view.render_to_string(environ, wb_prefix=wb_prefix, coll=coll, coll_config=coll_config, metadata=metadata) return WbResponse.text_response( content, content_type='text/html; charset="utf-8"')
def serve_cdx(self, environ, coll='$root'): """Make the upstream CDX query for a collection and response with the results of the query :param dict environ: The WSGI environment dictionary for the request :param str coll: The name of the collection this CDX query is for :return: The WbResponse containing the results of the CDX query :rtype: WbResponse """ base_url = self.rewriterapp.paths['cdx-server'] # if coll == self.all_coll: # coll = '*' cdx_url = base_url.format(coll=coll) if environ.get('QUERY_STRING'): cdx_url += '&' if '?' in cdx_url else '?' cdx_url += environ.get('QUERY_STRING') if self.query_limit: cdx_url += '&' if '?' in cdx_url else '?' cdx_url += 'limit=' + str(self.query_limit) try: res = requests.get(cdx_url, stream=True) content_type = res.headers.get('Content-Type') return WbResponse.bin_stream(StreamIter(res.raw), content_type=content_type) except Exception as e: return WbResponse.text_response('Error: ' + str(e), status='400 Bad Request')
def _not_found_response(self, environ, url): resp = self.not_found_view.render_to_string(environ, url=url, err_msg="Not Found") return WbResponse.text_response(resp, status='404 Not Found', content_type='text/html')
def _error_response(self, environ, wbe): status = wbe.status() resp = self.error_view.render_to_string(environ, err_msg=wbe.url, err_details=wbe.msg, err_status=wbe.status_code) return WbResponse.text_response(resp, status=status, content_type='text/html')
def _error_response(self, environ, msg='', details='', status='404 Not Found'): resp = self.error_view.render_to_string(environ, err_msg=msg, err_details=details) return WbResponse.text_response(resp, status=status, content_type='text/html')
def test_resp_1(): resp = vars(WbResponse.text_response('Test')) expected = { 'body': [b'Test'], 'status_headers': StatusAndHeaders(protocol='', statusline='200 OK', headers=[('Content-Type', 'text/plain; charset=utf-8'), ('Content-Length', '4')]) } assert (resp == expected)
def serve_record(self, environ, coll='$root', url=''): """Serve a URL's content from a WARC/ARC record in replay mode or from the live web in live, proxy, and record mode. :param dict environ: The WSGI environment dictionary for the request :param str coll: The name of the collection the record is to be served from :param str url: The URL for the corresponding record to be served if it exists :return: WbResponse containing the contents of the record/URL :rtype: WbResponse """ if coll in self.warcserver.list_fixed_routes(): return WbResponse.text_response('Error: Can Not Record Into Custom Collection "{0}"'.format(coll)) return self.serve_content(environ, coll, url, record=True)
def serve_home(self, environ): home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html') fixed_routes = self.warcserver.list_fixed_routes() dynamic_routes = self.warcserver.list_dynamic_routes() routes = fixed_routes + dynamic_routes all_metadata = self.metadata_cache.get_all(dynamic_routes) content = home_view.render_to_string(environ, routes=routes, all_metadata=all_metadata) return WbResponse.text_response( content, content_type='text/html; charset="utf-8"')
def test_resp_4(): resp = vars(WbResponse.text_response('Test').add_range(10, 4, 100)) expected = { 'body': [b'Test'], 'status_headers': StatusAndHeaders(protocol='', statusline='206 Partial Content', headers=[('Content-Type', 'text/plain; charset=utf-8'), ('Content-Length', '4'), ('Content-Range', 'bytes 10-13/100'), ('Accept-Ranges', 'bytes')]) } assert (resp == expected)
def serve_record(self, environ, coll='$root', url=''): """Serve a URL's content from a WARC/ARC record in replay mode or from the live web in live, proxy, and record mode. :param dict environ: The WSGI environment dictionary for the request :param str coll: The name of the collection the record is to be served from :param str url: The URL for the corresponding record to be served if it exists :return: WbResponse containing the contents of the record/URL :rtype: WbResponse """ if coll in self.warcserver.list_fixed_routes(): return WbResponse.text_response( 'Error: Can Not Record Into Custom Collection "{0}"'.format( coll)) return self.serve_content(environ, coll, url, record=True)
def serve_coll_page(self, environ, coll='$root'): if not self.is_valid_coll(coll): self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll)) self.setup_paths(environ, coll) view = BaseInsertView(self.rewriterapp.jinja_env, 'fullsearch.html') wb_prefix = environ.get('SCRIPT_NAME', '') if wb_prefix: wb_prefix += '/' content = view.render_to_string(environ, wb_prefix=wb_prefix, coll=coll) return WbResponse.text_response( content, content_type='text/html; charset="utf-8"' )
def lock_listing(self, environ): lock_view = BaseInsertView(self.rewriterapp.jinja_env, 'locks.html') session = environ[SESSION_KEY] sessions = {} for sesh_key in session.redis.scan_iter(SESH_LIST.format('*')): sesh = sesh_key.split(':')[1] sessions[sesh] = [key[5:] for key in session.redis.smembers(sesh_key)] content = lock_view.render_to_string(environ, current=session.sid, sessions=sessions) return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
def serve_home(self, environ): """Serves the home (/) view of pywb (not a collections) :param dict environ: The WSGI environment dictionary for the request :return: The WbResponse for serving the home (/) path :rtype: WbResponse """ home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html') fixed_routes = self.warcserver.list_fixed_routes() dynamic_routes = self.warcserver.list_dynamic_routes() routes = fixed_routes + dynamic_routes all_metadata = self.metadata_cache.get_all(dynamic_routes) content = home_view.render_to_string(environ, routes=routes, all_metadata=all_metadata) return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
def serve_home(self, environ): """Serves the home (/) view of pywb (not a collections) :param dict environ: The WSGI environment dictionary for the request :return: The WbResponse for serving the home (/) path :rtype: WbResponse """ home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html') fixed_routes = self.warcserver.list_fixed_routes() dynamic_routes = self.warcserver.list_dynamic_routes() routes = fixed_routes + dynamic_routes all_metadata = self.metadata_cache.get_all(dynamic_routes) content = home_view.render_to_string(environ, routes=routes, all_metadata=all_metadata) return WbResponse.text_response( content, content_type='text/html; charset="utf-8"')
def make_timemap(self, wb_url, res, full_prefix, output): wb_url.type = wb_url.QUERY content_type = res.headers.get('Content-Type') text = res.text if not res.text: status = '404 Not Found' elif res.status_code: status = str(res.status_code) + ' ' + res.reason if res.status_code == 200 and output == 'link': timegate, timemap = self._get_timegate_timemap( wb_url.url, full_prefix, wb_url.mod) text = MementoUtils.wrap_timemap_header( wb_url.url, timegate, timemap, res.text) return WbResponse.text_response(text, content_type=content_type, status=status)
def serve_cdx(self, environ, coll='$root'): base_url = self.rewriterapp.paths['cdx-server'] #if coll == self.all_coll: # coll = '*' cdx_url = base_url.format(coll=coll) if environ.get('QUERY_STRING'): cdx_url += '&' if '?' in cdx_url else '?' cdx_url += environ.get('QUERY_STRING') try: res = requests.get(cdx_url, stream=True) content_type = res.headers.get('Content-Type') return WbResponse.bin_stream(StreamIter(res.raw), content_type=content_type) except Exception as e: return WbResponse.text_response('Error: ' + str(e), status='400 Bad Request')