示例#1
0
def _test_route_req(route, env, abs_path=False):
    matcher, coll = route.is_handling(env['REL_REQUEST_URI'])
    if not matcher:
        return

    the_router = ArchivalRouter([route], abs_path=abs_path)
    req = the_router.parse_request(route, env, matcher, coll, env['REL_REQUEST_URI'], abs_path)

    varlist = vars(req)
    the_dict = dict((k, varlist[k]) for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll'))
    pprint.pprint(the_dict)
示例#2
0
def _test_route_req(route, env, abs_path=False):
    matcher, coll = route.is_handling(env['REL_REQUEST_URI'])
    if not matcher:
        return

    the_router = ArchivalRouter([route], abs_path=abs_path)
    req = the_router.parse_request(route, env, matcher, coll,
                                   env['REL_REQUEST_URI'], abs_path)

    varlist = vars(req)
    the_dict = dict((k, varlist[k])
                    for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll'))
    pprint.pprint(the_dict)
示例#3
0
def create_cdx_server_app(passed_config):
    """
    Create a cdx server api-only app
    For each collection, create a /<coll>-cdx access point
    which follows the cdx api
    """

    defaults = load_yaml_config(DEFAULT_CONFIG)

    config = DictChain(passed_config, defaults)

    collections = config.get('collections', {})

    static_routes = {}

    # collections based on file system
    if config.get('enable_auto_colls', True):
        colls_loader_cls = config.get('colls_loader_cls', DirectoryCollsLoader)
        dir_loader = colls_loader_cls(config, static_routes, collections)
        dir_loader()
        #collections.update(dir_loader())

    routes = []

    for name, value in six.iteritems(collections):
        route_config = init_route_config(value, config)
        query_handler = init_collection(route_config)

        cdx_api_suffix = route_config.get('enable_cdx_api', True)

        add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler)

    return ArchivalRouter(routes)
示例#4
0
def create_live_rewriter_app(config={}):
    routes = [
        Route('rewrite', RewriteHandler(config)),
        Route('static/__pywb', StaticHandler('pywb/static/'))
    ]

    return ArchivalRouter(routes, hostpaths=['http://localhost:8080'])
示例#5
0
def create_perms_checker_app(config):
    """
    Create permissions checker standalone app
    Running under the '/check-access' route
    """
    port = config.get('port')

    perms_policy = config.get('perms_policy')

    canonicalizer = UrlCanonicalizer(config.get('surt_ordered', True))

    handler = PermsHandler(perms_policy, canonicalizer)
    routes = [Route('check-access', handler)]

    return ArchivalRouter(routes, port=port)
示例#6
0
def create_cdx_server_app(passed_config):
    """
    Create a cdx server api-only app
    For each collection, create a /<coll>-cdx access point
    which follows the cdx api
    """
    config = DictChain(passed_config, DEFAULTS)

    collections = config.get('collections')

    routes = []

    for name, value in collections.iteritems():
        route_config = init_route_config(value, config)
        query_handler = init_collection(route_config)

        cdx_api_suffix = route_config.get('enable_cdx_api', True)

        add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler)

    return ArchivalRouter(routes)
示例#7
0
def _test_redir(match_host,
                request_uri,
                referrer,
                script_name='',
                coll='coll'):
    env = {
        'REL_REQUEST_URI': request_uri,
        'HTTP_REFERER': referrer,
        'SCRIPT_NAME': script_name
    }

    env['HTTP_HOST'] = urlsplit(match_host).netloc

    routes = [Route(coll, WbUrlHandler())]

    the_router = ArchivalRouter(routes)

    redir = ReferRedirect()
    #req = WbRequest.from_uri(request_uri, env)
    rep = redir(env, the_router)
    if not rep:
        return False

    return rep.status_headers.get_header('Location')
示例#8
0
    def __call__(self, env):
        is_https = (env['REQUEST_METHOD'] == 'CONNECT')
        ArchivalRouter.ensure_rel_uri_set(env)

        # for non-https requests, check non-proxy urls
        if not is_https:
            url = env['REL_REQUEST_URI']

            if not url.startswith(('http://', 'https://')):
                return None

            env['pywb.proxy_scheme'] = 'http'

        route = None
        coll = None
        matcher = None
        response = None
        ts = None

        # check resolver, for pre connect resolve
        if self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        # do connect, then get updated url
        if is_https:
            response = self.handle_connect(env)
            if response:
                return response

            url = env['REL_REQUEST_URI']
        else:
            parts = urlparse.urlsplit(env['REL_REQUEST_URI'])
            hostport = parts.netloc.split(':', 1)
            env['pywb.proxy_host'] = hostport[0]
            env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else ''
            env['pywb.proxy_req_uri'] = parts.path
            if parts.query:
                env['pywb.proxy_req_uri'] += '?' + parts.query
                env['pywb.proxy_query'] = parts.query

        env['pywb_proxy_magic'] = self.magic_name

        # route (static) and other resources to archival replay
        if env['pywb.proxy_host'] == self.magic_name:
            env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']

            # special case for proxy install
            response = self.handle_cert_install(env)
            if response:
                return response

            return None

        # check resolver, post connect
        if not self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name
        rel_prefix = ''

        # special case for proxy calendar
        if (env['pywb.proxy_host'] == 'query.' + self.magic_name):
            url = env['pywb.proxy_req_uri'][1:]
            rel_prefix = '/'

        if ts is not None:
            url = ts + '/' + url

        wbrequest = route.request_class(env,
                              request_uri=url,
                              wb_url_str=url,
                              coll=coll,
                              host_prefix=host_prefix,
                              rel_prefix=rel_prefix,
                              wburl_class=route.handler.get_wburl_type(),
                              urlrewriter_class=HttpsUrlRewriter,
                              use_abs_prefix=False,
                              is_proxy=True)

        if matcher:
            route.apply_filters(wbrequest, matcher)

        # full rewrite and banner
        if self.use_wombat and self.use_banner:
            wbrequest.wb_url.mod = ''
        elif self.use_banner:
        # banner only, no rewrite
            wbrequest.wb_url.mod = 'bn_'
        else:
        # unaltered, no rewrite or banner
            wbrequest.wb_url.mod = 'id_'

        response = route.handler(wbrequest)

        if wbrequest.wb_url and wbrequest.wb_url.is_replay():
            response.status_headers.replace_headers(self.extra_headers)

        return response
示例#9
0
文件: proxy.py 项目: chdorner/pywb
    def __call__(self, env):
        is_https = (env['REQUEST_METHOD'] == 'CONNECT')
        ArchivalRouter.ensure_rel_uri_set(env)

        # for non-https requests, check non-proxy urls
        if not is_https:
            url = env['REL_REQUEST_URI']

            if not url.startswith(('http://', 'https://')):
                return None

            env['pywb.proxy_scheme'] = 'http'

        route = None
        coll = None
        matcher = None
        response = None
        ts = None

        # check resolver, for pre connect resolve
        if self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        # do connect, then get updated url
        if is_https:
            response = self.handle_connect(env)
            if response:
                return response

            url = env['REL_REQUEST_URI']
        else:
            parts = urlsplit(env['REL_REQUEST_URI'])
            hostport = parts.netloc.split(':', 1)
            env['pywb.proxy_host'] = hostport[0]
            env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else ''
            env['pywb.proxy_req_uri'] = parts.path
            if parts.query:
                env['pywb.proxy_req_uri'] += '?' + parts.query
                env['pywb.proxy_query'] = parts.query

        if self.resolver.supports_switching:
            env['pywb_proxy_magic'] = self.magic_name

        # route (static) and other resources to archival replay
        if env['pywb.proxy_host'] == self.magic_name:
            env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']

            # special case for proxy install
            response = self.handle_cert_install(env)
            if response:
                return response

            return None

        # check resolver, post connect
        if not self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        rel_prefix = ''

        custom_prefix = env.get('HTTP_PYWB_REWRITE_PREFIX', '')
        if custom_prefix:
            host_prefix = custom_prefix
            urlrewriter_class = UrlRewriter
            abs_prefix = True
            # always rewrite to absolute here
            rewrite_opts = dict(no_match_rel=True)
        else:
            host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name
            urlrewriter_class = SchemeOnlyUrlRewriter
            abs_prefix = False
            rewrite_opts = {}

        # special case for proxy calendar
        if (env['pywb.proxy_host'] == 'query.' + self.magic_name):
            url = env['pywb.proxy_req_uri'][1:]
            rel_prefix = '/'

        if ts is not None:
            url = ts + '/' + url

        wbrequest = route.request_class(env,
                              request_uri=url,
                              wb_url_str=url,
                              coll=coll,
                              host_prefix=host_prefix,
                              rel_prefix=rel_prefix,
                              wburl_class=route.handler.get_wburl_type(),
                              urlrewriter_class=urlrewriter_class,
                              use_abs_prefix=abs_prefix,
                              rewrite_opts=rewrite_opts,
                              is_proxy=True)

        if matcher:
            route.apply_filters(wbrequest, matcher)

        # full rewrite and banner
        if self.use_wombat and self.use_banner:
            wbrequest.wb_url.mod = ''
        elif self.use_banner:
        # banner only, no rewrite
            wbrequest.wb_url.mod = 'bn_'
        else:
        # unaltered, no rewrite or banner
            wbrequest.wb_url.mod = 'uo_'

        response = route.handler(wbrequest)
        if not response:
            return None

        # add extra headers for replay responses
        if wbrequest.wb_url and wbrequest.wb_url.is_replay():
            response.status_headers.replace_headers(self.extra_headers)

        # check for content-length
        res = response.status_headers.get_header('content-length')
        try:
            if int(res) > 0:
                return response
        except:
            pass

        # need to either chunk or buffer to get content-length
        if env.get('SERVER_PROTOCOL') == 'HTTP/1.1':
            response.status_headers.remove_header('content-length')
            response.status_headers.headers.append(('Transfer-Encoding', 'chunked'))
            response.body = self._chunk_encode(response.body)
        else:
            response.body = self._buffer_response(response.status_headers,
                                                  response.body)

        return response
示例#10
0
    def __call__(self, env):
        is_https = (env['REQUEST_METHOD'] == 'CONNECT')
        ArchivalRouter.ensure_rel_uri_set(env)

        # for non-https requests, check non-proxy urls
        if not is_https:
            url = env['REL_REQUEST_URI']

            if not url.startswith(('http://', 'https://')):
                return None

            env['pywb.proxy_scheme'] = 'http'

        route = None
        coll = None
        matcher = None
        response = None
        ts = None

        # check resolver, for pre connect resolve
        if self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        # do connect, then get updated url
        if is_https:
            response = self.handle_connect(env)
            if response:
                return response

            url = env['REL_REQUEST_URI']
        else:
            parts = urlsplit(env['REL_REQUEST_URI'])
            hostport = parts.netloc.split(':', 1)
            env['pywb.proxy_host'] = hostport[0]
            env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else ''
            env['pywb.proxy_req_uri'] = parts.path
            if parts.query:
                env['pywb.proxy_req_uri'] += '?' + parts.query
                env['pywb.proxy_query'] = parts.query

        if self.resolver.supports_switching:
            env['pywb_proxy_magic'] = self.magic_name

        # route (static) and other resources to archival replay
        if env['pywb.proxy_host'] == self.magic_name:
            env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']

            # special case for proxy install
            response = self.handle_cert_install(env)
            if response:
                return response

            return None

        # check resolver, post connect
        if not self.resolver.pre_connect:
            route, coll, matcher, ts, response = self.resolver.resolve(env)
            if response:
                return response

        rel_prefix = ''

        custom_prefix = env.get('HTTP_PYWB_REWRITE_PREFIX', '')
        if custom_prefix:
            host_prefix = custom_prefix
            urlrewriter_class = UrlRewriter
            abs_prefix = True
            # always rewrite to absolute here
            rewrite_opts = dict(no_match_rel=True)
        else:
            host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name
            urlrewriter_class = SchemeOnlyUrlRewriter
            abs_prefix = False
            rewrite_opts = {}

        # special case for proxy calendar
        if (env['pywb.proxy_host'] == 'query.' + self.magic_name):
            url = env['pywb.proxy_req_uri'][1:]
            rel_prefix = '/'

        if ts is not None:
            url = ts + '/' + url

        wbrequest = route.request_class(
            env,
            request_uri=url,
            wb_url_str=url,
            coll=coll,
            host_prefix=host_prefix,
            rel_prefix=rel_prefix,
            wburl_class=route.handler.get_wburl_type(),
            urlrewriter_class=urlrewriter_class,
            use_abs_prefix=abs_prefix,
            rewrite_opts=rewrite_opts,
            is_proxy=True)

        if matcher:
            route.apply_filters(wbrequest, matcher)

        # full rewrite and banner
        if self.use_wombat and self.use_banner:
            wbrequest.wb_url.mod = ''
        elif self.use_banner:
            # banner only, no rewrite
            wbrequest.wb_url.mod = 'bn_'
        else:
            # unaltered, no rewrite or banner
            wbrequest.wb_url.mod = 'uo_'

        response = route.handler(wbrequest)
        if not response:
            return None

        # add extra headers for replay responses
        if wbrequest.wb_url and wbrequest.wb_url.is_replay():
            response.status_headers.replace_headers(self.extra_headers)

        # check for content-length
        res = response.status_headers.get_header('content-length')
        try:
            if int(res) > 0:
                return response
        except:
            pass

        # need to either chunk or buffer to get content-length
        if env.get('SERVER_PROTOCOL') == 'HTTP/1.1':
            response.status_headers.remove_header('content-length')
            response.status_headers.headers.append(
                ('Transfer-Encoding', 'chunked'))
            response.body = self._chunk_encode(response.body)
        else:
            response.body = self._buffer_response(response.status_headers,
                                                  response.body)

        return response