def _test_route_req(route, env, abs_path=False): matcher, coll = route.is_handling(env['REL_REQUEST_URI']) if not matcher: return the_router = ArchivalRouter([route], abs_path=abs_path) req = the_router.parse_request(route, env, matcher, coll, env['REL_REQUEST_URI'], abs_path) varlist = vars(req) the_dict = dict((k, varlist[k]) for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')) pprint.pprint(the_dict)
def create_cdx_server_app(passed_config): """ Create a cdx server api-only app For each collection, create a /<coll>-cdx access point which follows the cdx api """ defaults = load_yaml_config(DEFAULT_CONFIG) config = DictChain(passed_config, defaults) collections = config.get('collections', {}) static_routes = {} # collections based on file system if config.get('enable_auto_colls', True): colls_loader_cls = config.get('colls_loader_cls', DirectoryCollsLoader) dir_loader = colls_loader_cls(config, static_routes, collections) dir_loader() #collections.update(dir_loader()) routes = [] for name, value in six.iteritems(collections): route_config = init_route_config(value, config) query_handler = init_collection(route_config) cdx_api_suffix = route_config.get('enable_cdx_api', True) add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler) return ArchivalRouter(routes)
def create_live_rewriter_app(config={}): routes = [ Route('rewrite', RewriteHandler(config)), Route('static/__pywb', StaticHandler('pywb/static/')) ] return ArchivalRouter(routes, hostpaths=['http://localhost:8080'])
def create_perms_checker_app(config): """ Create permissions checker standalone app Running under the '/check-access' route """ port = config.get('port') perms_policy = config.get('perms_policy') canonicalizer = UrlCanonicalizer(config.get('surt_ordered', True)) handler = PermsHandler(perms_policy, canonicalizer) routes = [Route('check-access', handler)] return ArchivalRouter(routes, port=port)
def create_cdx_server_app(passed_config): """ Create a cdx server api-only app For each collection, create a /<coll>-cdx access point which follows the cdx api """ config = DictChain(passed_config, DEFAULTS) collections = config.get('collections') routes = [] for name, value in collections.iteritems(): route_config = init_route_config(value, config) query_handler = init_collection(route_config) cdx_api_suffix = route_config.get('enable_cdx_api', True) add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler) return ArchivalRouter(routes)
def _test_redir(match_host, request_uri, referrer, script_name='', coll='coll'): env = { 'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer, 'SCRIPT_NAME': script_name } env['HTTP_HOST'] = urlsplit(match_host).netloc routes = [Route(coll, WbUrlHandler())] the_router = ArchivalRouter(routes) redir = ReferRedirect() #req = WbRequest.from_uri(request_uri, env) rep = redir(env, the_router) if not rep: return False return rep.status_headers.get_header('Location')
def __call__(self, env): is_https = (env['REQUEST_METHOD'] == 'CONNECT') ArchivalRouter.ensure_rel_uri_set(env) # for non-https requests, check non-proxy urls if not is_https: url = env['REL_REQUEST_URI'] if not url.startswith(('http://', 'https://')): return None env['pywb.proxy_scheme'] = 'http' route = None coll = None matcher = None response = None ts = None # check resolver, for pre connect resolve if self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response # do connect, then get updated url if is_https: response = self.handle_connect(env) if response: return response url = env['REL_REQUEST_URI'] else: parts = urlparse.urlsplit(env['REL_REQUEST_URI']) hostport = parts.netloc.split(':', 1) env['pywb.proxy_host'] = hostport[0] env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else '' env['pywb.proxy_req_uri'] = parts.path if parts.query: env['pywb.proxy_req_uri'] += '?' + parts.query env['pywb.proxy_query'] = parts.query env['pywb_proxy_magic'] = self.magic_name # route (static) and other resources to archival replay if env['pywb.proxy_host'] == self.magic_name: env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri'] # special case for proxy install response = self.handle_cert_install(env) if response: return response return None # check resolver, post connect if not self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name rel_prefix = '' # special case for proxy calendar if (env['pywb.proxy_host'] == 'query.' + self.magic_name): url = env['pywb.proxy_req_uri'][1:] rel_prefix = '/' if ts is not None: url = ts + '/' + url wbrequest = route.request_class(env, request_uri=url, wb_url_str=url, coll=coll, host_prefix=host_prefix, rel_prefix=rel_prefix, wburl_class=route.handler.get_wburl_type(), urlrewriter_class=HttpsUrlRewriter, use_abs_prefix=False, is_proxy=True) if matcher: route.apply_filters(wbrequest, matcher) # full rewrite and banner if self.use_wombat and self.use_banner: wbrequest.wb_url.mod = '' elif self.use_banner: # banner only, no rewrite wbrequest.wb_url.mod = 'bn_' else: # unaltered, no rewrite or banner wbrequest.wb_url.mod = 'id_' response = route.handler(wbrequest) if wbrequest.wb_url and wbrequest.wb_url.is_replay(): response.status_headers.replace_headers(self.extra_headers) return response
def __call__(self, env): is_https = (env['REQUEST_METHOD'] == 'CONNECT') ArchivalRouter.ensure_rel_uri_set(env) # for non-https requests, check non-proxy urls if not is_https: url = env['REL_REQUEST_URI'] if not url.startswith(('http://', 'https://')): return None env['pywb.proxy_scheme'] = 'http' route = None coll = None matcher = None response = None ts = None # check resolver, for pre connect resolve if self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response # do connect, then get updated url if is_https: response = self.handle_connect(env) if response: return response url = env['REL_REQUEST_URI'] else: parts = urlsplit(env['REL_REQUEST_URI']) hostport = parts.netloc.split(':', 1) env['pywb.proxy_host'] = hostport[0] env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else '' env['pywb.proxy_req_uri'] = parts.path if parts.query: env['pywb.proxy_req_uri'] += '?' + parts.query env['pywb.proxy_query'] = parts.query if self.resolver.supports_switching: env['pywb_proxy_magic'] = self.magic_name # route (static) and other resources to archival replay if env['pywb.proxy_host'] == self.magic_name: env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri'] # special case for proxy install response = self.handle_cert_install(env) if response: return response return None # check resolver, post connect if not self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response rel_prefix = '' custom_prefix = env.get('HTTP_PYWB_REWRITE_PREFIX', '') if custom_prefix: host_prefix = custom_prefix urlrewriter_class = UrlRewriter abs_prefix = True # always rewrite to absolute here rewrite_opts = dict(no_match_rel=True) else: host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name urlrewriter_class = SchemeOnlyUrlRewriter abs_prefix = False rewrite_opts = {} # special case for proxy calendar if (env['pywb.proxy_host'] == 'query.' + self.magic_name): url = env['pywb.proxy_req_uri'][1:] rel_prefix = '/' if ts is not None: url = ts + '/' + url wbrequest = route.request_class(env, request_uri=url, wb_url_str=url, coll=coll, host_prefix=host_prefix, rel_prefix=rel_prefix, wburl_class=route.handler.get_wburl_type(), urlrewriter_class=urlrewriter_class, use_abs_prefix=abs_prefix, rewrite_opts=rewrite_opts, is_proxy=True) if matcher: route.apply_filters(wbrequest, matcher) # full rewrite and banner if self.use_wombat and self.use_banner: wbrequest.wb_url.mod = '' elif self.use_banner: # banner only, no rewrite wbrequest.wb_url.mod = 'bn_' else: # unaltered, no rewrite or banner wbrequest.wb_url.mod = 'uo_' response = route.handler(wbrequest) if not response: return None # add extra headers for replay responses if wbrequest.wb_url and wbrequest.wb_url.is_replay(): response.status_headers.replace_headers(self.extra_headers) # check for content-length res = response.status_headers.get_header('content-length') try: if int(res) > 0: return response except: pass # need to either chunk or buffer to get content-length if env.get('SERVER_PROTOCOL') == 'HTTP/1.1': response.status_headers.remove_header('content-length') response.status_headers.headers.append(('Transfer-Encoding', 'chunked')) response.body = self._chunk_encode(response.body) else: response.body = self._buffer_response(response.status_headers, response.body) return response
def __call__(self, env): is_https = (env['REQUEST_METHOD'] == 'CONNECT') ArchivalRouter.ensure_rel_uri_set(env) # for non-https requests, check non-proxy urls if not is_https: url = env['REL_REQUEST_URI'] if not url.startswith(('http://', 'https://')): return None env['pywb.proxy_scheme'] = 'http' route = None coll = None matcher = None response = None ts = None # check resolver, for pre connect resolve if self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response # do connect, then get updated url if is_https: response = self.handle_connect(env) if response: return response url = env['REL_REQUEST_URI'] else: parts = urlsplit(env['REL_REQUEST_URI']) hostport = parts.netloc.split(':', 1) env['pywb.proxy_host'] = hostport[0] env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else '' env['pywb.proxy_req_uri'] = parts.path if parts.query: env['pywb.proxy_req_uri'] += '?' + parts.query env['pywb.proxy_query'] = parts.query if self.resolver.supports_switching: env['pywb_proxy_magic'] = self.magic_name # route (static) and other resources to archival replay if env['pywb.proxy_host'] == self.magic_name: env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri'] # special case for proxy install response = self.handle_cert_install(env) if response: return response return None # check resolver, post connect if not self.resolver.pre_connect: route, coll, matcher, ts, response = self.resolver.resolve(env) if response: return response rel_prefix = '' custom_prefix = env.get('HTTP_PYWB_REWRITE_PREFIX', '') if custom_prefix: host_prefix = custom_prefix urlrewriter_class = UrlRewriter abs_prefix = True # always rewrite to absolute here rewrite_opts = dict(no_match_rel=True) else: host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name urlrewriter_class = SchemeOnlyUrlRewriter abs_prefix = False rewrite_opts = {} # special case for proxy calendar if (env['pywb.proxy_host'] == 'query.' + self.magic_name): url = env['pywb.proxy_req_uri'][1:] rel_prefix = '/' if ts is not None: url = ts + '/' + url wbrequest = route.request_class( env, request_uri=url, wb_url_str=url, coll=coll, host_prefix=host_prefix, rel_prefix=rel_prefix, wburl_class=route.handler.get_wburl_type(), urlrewriter_class=urlrewriter_class, use_abs_prefix=abs_prefix, rewrite_opts=rewrite_opts, is_proxy=True) if matcher: route.apply_filters(wbrequest, matcher) # full rewrite and banner if self.use_wombat and self.use_banner: wbrequest.wb_url.mod = '' elif self.use_banner: # banner only, no rewrite wbrequest.wb_url.mod = 'bn_' else: # unaltered, no rewrite or banner wbrequest.wb_url.mod = 'uo_' response = route.handler(wbrequest) if not response: return None # add extra headers for replay responses if wbrequest.wb_url and wbrequest.wb_url.is_replay(): response.status_headers.replace_headers(self.extra_headers) # check for content-length res = response.status_headers.get_header('content-length') try: if int(res) > 0: return response except: pass # need to either chunk or buffer to get content-length if env.get('SERVER_PROTOCOL') == 'HTTP/1.1': response.status_headers.remove_header('content-length') response.status_headers.headers.append( ('Transfer-Encoding', 'chunked')) response.body = self._chunk_encode(response.body) else: response.body = self._buffer_response(response.status_headers, response.body) return response