Exemplo n.º 1
0
    def serve_coll_page(self, environ, coll='$root'):
        """Render and serve a collections search page (search.html).

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection to serve the collections search page for
        :return: The WbResponse containing the collections search page
        :rtype: WbResponse
        """
        if not self.is_valid_coll(coll):
            self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))

        self.setup_paths(environ, coll)

        metadata = self.get_metadata(coll)

        view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')

        wb_prefix = environ.get('SCRIPT_NAME')
        if wb_prefix:
            wb_prefix += '/'

        content = view.render_to_string(environ,
                                        wb_prefix=wb_prefix,
                                        metadata=metadata,
                                        coll=coll)

        return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
Exemplo n.º 2
0
    def proxy_fetch(self, env, url):
        """Proxy mode only endpoint that handles OPTIONS requests and COR fetches for Preservation Worker.

        Due to normal cross-origin browser restrictions in proxy mode, auto fetch worker cannot access the CSS rules
        of cross-origin style sheets and must re-fetch them in a manner that is CORS safe. This endpoint facilitates
        that by fetching the stylesheets for the auto fetch worker and then responds with its contents

        :param dict env: The WSGI environment dictionary
        :param str url:  The URL of the resource to be fetched
        :return: WbResponse that is either response to an Options request or the results of fetching url
        :rtype: WbResponse
        """
        if not self.is_proxy_enabled(env):
            # we are not in proxy mode so just respond with forbidden
            return WbResponse.text_response(
                'proxy mode must be enabled to use this endpoint',
                status='403 Forbidden')

        if env.get('REQUEST_METHOD') == 'OPTIONS':
            return WbResponse.options_response(env)

        # ensure full URL
        request_url = env['REQUEST_URI']
        # replace with /id_ so we do not get rewritten
        url = request_url.replace('/proxy-fetch', '/id_')
        # update WSGI environment object
        env['REQUEST_URI'] = self.proxy_coll + url
        env['PATH_INFO'] = env['PATH_INFO'].replace('/proxy-fetch',
                                                    self.proxy_coll + '/id_')
        # make request using normal serve_content
        response = self.serve_content(env, self.proxy_coll, url)
        # for WR
        if isinstance(response, WbResponse):
            response.add_access_control_headers(env=env)
        return response
Exemplo n.º 3
0
    def proxy_fetch(self, env, url):
        """Proxy mode only endpoint that handles OPTIONS requests and COR fetches for Preservation Worker.

        Due to normal cross-origin browser restrictions in proxy mode, auto fetch worker cannot access the CSS rules
        of cross-origin style sheets and must re-fetch them in a manner that is CORS safe. This endpoint facilitates
        that by fetching the stylesheets for the auto fetch worker and then responds with its contents

        :param dict env: The WSGI environment dictionary
        :param str url:  The URL of the resource to be fetched
        :return: WbResponse that is either response to an Options request or the results of fetching url
        :rtype: WbResponse
        """
        if not self.is_proxy_enabled(env):
            # we are not in proxy mode so just respond with forbidden
            return WbResponse.text_response('proxy mode must be enabled to use this endpoint',
                                            status='403 Forbidden')

        if env.get('REQUEST_METHOD') == 'OPTIONS':
            return WbResponse.options_response(env)

        # ensure full URL
        request_url = env['REQUEST_URI']
        # replace with /id_ so we do not get rewritten
        url = request_url.replace('/proxy-fetch', '/id_')
        # update WSGI environment object
        env['REQUEST_URI'] = self.proxy_coll + url
        env['PATH_INFO'] = env['PATH_INFO'].replace('/proxy-fetch', self.proxy_coll + '/id_')
        # make request using normal serve_content
        response = self.serve_content(env, self.proxy_coll, url)
        # for WR
        if isinstance(response, WbResponse):
            response.add_access_control_headers(env=env)
        return response
Exemplo n.º 4
0
    def serve_cdx(self, environ, coll='$root'):
        """Make the upstream CDX query for a collection and response with the results of the query

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection this CDX query is for
        :return: The WbResponse containing the results of the CDX query
        :rtype: WbResponse
        """
        base_url = self.rewriterapp.paths['cdx-server']

        #if coll == self.all_coll:
        #    coll = '*'

        cdx_url = base_url.format(coll=coll)

        if environ.get('QUERY_STRING'):
            cdx_url += '&' if '?' in cdx_url else '?'
            cdx_url += environ.get('QUERY_STRING')

        try:
            res = requests.get(cdx_url, stream=True)

            content_type = res.headers.get('Content-Type')

            return WbResponse.bin_stream(StreamIter(res.raw),
                                         content_type=content_type)

        except Exception as e:
            return WbResponse.text_response('Error: ' + str(e), status='400 Bad Request')
Exemplo n.º 5
0
    def serve_record(self, environ, coll='$root', url=''):
        if coll in self.warcserver.list_fixed_routes():
            return WbResponse.text_response(
                'Error: Can Not Record Into Custom Collection "{0}"'.format(
                    coll))

        return self.serve_content(environ, coll, url, record=True)
Exemplo n.º 6
0
    def format_response(self,
                        response,
                        wb_url,
                        full_prefix,
                        is_timegate,
                        is_proxy,
                        timegate_closest_ts=None):
        memento_ts = None
        if not isinstance(response, WbResponse):
            content_type = 'text/html'

            # if not replay outer frame, specify utf-8 charset
            if not self.is_framed_replay(wb_url):
                content_type += '; charset=utf-8'
            else:
                memento_ts = timegate_closest_ts or wb_url.timestamp

            response = WbResponse.text_response(response,
                                                content_type=content_type)

        if self.enable_memento and response.status_headers.statusline.startswith(
                '200'):
            self._add_memento_links(wb_url.url,
                                    full_prefix,
                                    None,
                                    memento_ts,
                                    response.status_headers,
                                    is_timegate,
                                    is_proxy,
                                    is_memento=not is_timegate)
        return response
Exemplo n.º 7
0
    def serve_coll_page(self, environ, coll='$root'):
        """Render and serve a collections search page (search.html).

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection to serve the collections search page for
        :return: The WbResponse containing the collections search page
        :rtype: WbResponse
        """
        if not self.is_valid_coll(coll):
            self.raise_not_found(environ, 'coll_not_found', coll)

        self.setup_paths(environ, coll)

        coll_config = self.get_coll_config(coll)
        metadata = coll_config.get('metadata')

        view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')

        wb_prefix = environ.get('SCRIPT_NAME', '')
        if wb_prefix:
            wb_prefix += '/'

        content = view.render_to_string(environ,
                                        wb_prefix=wb_prefix,
                                        coll=coll,
                                        coll_config=coll_config,
                                        metadata=metadata)

        return WbResponse.text_response(
            content, content_type='text/html; charset="utf-8"')
Exemplo n.º 8
0
    def serve_cdx(self, environ, coll='$root'):
        """Make the upstream CDX query for a collection and response with the results of the query

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection this CDX query is for
        :return: The WbResponse containing the results of the CDX query
        :rtype: WbResponse
        """
        base_url = self.rewriterapp.paths['cdx-server']

        # if coll == self.all_coll:
        #    coll = '*'

        cdx_url = base_url.format(coll=coll)

        if environ.get('QUERY_STRING'):
            cdx_url += '&' if '?' in cdx_url else '?'
            cdx_url += environ.get('QUERY_STRING')

        if self.query_limit:
            cdx_url += '&' if '?' in cdx_url else '?'
            cdx_url += 'limit=' + str(self.query_limit)

        try:
            res = requests.get(cdx_url, stream=True)

            content_type = res.headers.get('Content-Type')

            return WbResponse.bin_stream(StreamIter(res.raw),
                                         content_type=content_type)

        except Exception as e:
            return WbResponse.text_response('Error: ' + str(e),
                                            status='400 Bad Request')
Exemplo n.º 9
0
    def _not_found_response(self, environ, url):
        resp = self.not_found_view.render_to_string(environ,
                                                    url=url,
                                                    err_msg="Not Found")

        return WbResponse.text_response(resp,
                                        status='404 Not Found',
                                        content_type='text/html')
Exemplo n.º 10
0
    def _error_response(self, environ, wbe):
        status = wbe.status()

        resp = self.error_view.render_to_string(environ,
                                                err_msg=wbe.url,
                                                err_details=wbe.msg,
                                                err_status=wbe.status_code)

        return WbResponse.text_response(resp, status=status, content_type='text/html')
Exemplo n.º 11
0
    def _error_response(self,
                        environ,
                        msg='',
                        details='',
                        status='404 Not Found'):
        resp = self.error_view.render_to_string(environ,
                                                err_msg=msg,
                                                err_details=details)

        return WbResponse.text_response(resp,
                                        status=status,
                                        content_type='text/html')
Exemplo n.º 12
0
def test_resp_1():
    resp = vars(WbResponse.text_response('Test'))

    expected = {
        'body': [b'Test'],
        'status_headers':
        StatusAndHeaders(protocol='',
                         statusline='200 OK',
                         headers=[('Content-Type',
                                   'text/plain; charset=utf-8'),
                                  ('Content-Length', '4')])
    }

    assert (resp == expected)
Exemplo n.º 13
0
    def serve_record(self, environ, coll='$root', url=''):
        """Serve a URL's content from a WARC/ARC record in replay mode or from the live web in
        live, proxy, and record mode.

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection the record is to be served from
        :param str url: The URL for the corresponding record to be served if it exists
        :return: WbResponse containing the contents of the record/URL
        :rtype: WbResponse
        """
        if coll in self.warcserver.list_fixed_routes():
            return WbResponse.text_response('Error: Can Not Record Into Custom Collection "{0}"'.format(coll))

        return self.serve_content(environ, coll, url, record=True)
Exemplo n.º 14
0
    def serve_home(self, environ):
        home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html')
        fixed_routes = self.warcserver.list_fixed_routes()
        dynamic_routes = self.warcserver.list_dynamic_routes()

        routes = fixed_routes + dynamic_routes

        all_metadata = self.metadata_cache.get_all(dynamic_routes)

        content = home_view.render_to_string(environ,
                                             routes=routes,
                                             all_metadata=all_metadata)

        return WbResponse.text_response(
            content, content_type='text/html; charset="utf-8"')
Exemplo n.º 15
0
def test_resp_4():
    resp = vars(WbResponse.text_response('Test').add_range(10, 4, 100))

    expected = {
        'body': [b'Test'],
        'status_headers':
        StatusAndHeaders(protocol='',
                         statusline='206 Partial Content',
                         headers=[('Content-Type',
                                   'text/plain; charset=utf-8'),
                                  ('Content-Length', '4'),
                                  ('Content-Range', 'bytes 10-13/100'),
                                  ('Accept-Ranges', 'bytes')])
    }

    assert (resp == expected)
Exemplo n.º 16
0
    def serve_record(self, environ, coll='$root', url=''):
        """Serve a URL's content from a WARC/ARC record in replay mode or from the live web in
        live, proxy, and record mode.

        :param dict environ: The WSGI environment dictionary for the request
        :param str coll: The name of the collection the record is to be served from
        :param str url: The URL for the corresponding record to be served if it exists
        :return: WbResponse containing the contents of the record/URL
        :rtype: WbResponse
        """
        if coll in self.warcserver.list_fixed_routes():
            return WbResponse.text_response(
                'Error: Can Not Record Into Custom Collection "{0}"'.format(
                    coll))

        return self.serve_content(environ, coll, url, record=True)
Exemplo n.º 17
0
    def serve_coll_page(self, environ, coll='$root'):
        if not self.is_valid_coll(coll):
            self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))

        self.setup_paths(environ, coll)

        view = BaseInsertView(self.rewriterapp.jinja_env, 'fullsearch.html')

        wb_prefix = environ.get('SCRIPT_NAME', '')
        if wb_prefix:
            wb_prefix += '/'

        content = view.render_to_string(environ, wb_prefix=wb_prefix, coll=coll)

        return WbResponse.text_response(
            content, content_type='text/html; charset="utf-8"'
        )
Exemplo n.º 18
0
    def lock_listing(self, environ):
        lock_view = BaseInsertView(self.rewriterapp.jinja_env, 'locks.html')

        session = environ[SESSION_KEY]

        sessions = {}

        for sesh_key in session.redis.scan_iter(SESH_LIST.format('*')):
            sesh = sesh_key.split(':')[1]

            sessions[sesh] = [key[5:] for key in session.redis.smembers(sesh_key)]

        content = lock_view.render_to_string(environ,
                                             current=session.sid,
                                             sessions=sessions)

        return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
Exemplo n.º 19
0
    def serve_home(self, environ):
        """Serves the home (/) view of pywb (not a collections)

        :param dict environ: The WSGI environment dictionary for the request
        :return: The WbResponse for serving the home (/) path
        :rtype: WbResponse
        """
        home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html')
        fixed_routes = self.warcserver.list_fixed_routes()
        dynamic_routes = self.warcserver.list_dynamic_routes()

        routes = fixed_routes + dynamic_routes

        all_metadata = self.metadata_cache.get_all(dynamic_routes)

        content = home_view.render_to_string(environ,
                                             routes=routes,
                                             all_metadata=all_metadata)

        return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
Exemplo n.º 20
0
    def serve_home(self, environ):
        """Serves the home (/) view of pywb (not a collections)

        :param dict environ: The WSGI environment dictionary for the request
        :return: The WbResponse for serving the home (/) path
        :rtype: WbResponse
        """
        home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html')
        fixed_routes = self.warcserver.list_fixed_routes()
        dynamic_routes = self.warcserver.list_dynamic_routes()

        routes = fixed_routes + dynamic_routes

        all_metadata = self.metadata_cache.get_all(dynamic_routes)

        content = home_view.render_to_string(environ,
                                             routes=routes,
                                             all_metadata=all_metadata)

        return WbResponse.text_response(
            content, content_type='text/html; charset="utf-8"')
Exemplo n.º 21
0
    def make_timemap(self, wb_url, res, full_prefix, output):
        wb_url.type = wb_url.QUERY

        content_type = res.headers.get('Content-Type')
        text = res.text

        if not res.text:
            status = '404 Not Found'

        elif res.status_code:
            status = str(res.status_code) + ' ' + res.reason

            if res.status_code == 200 and output == 'link':
                timegate, timemap = self._get_timegate_timemap(
                    wb_url.url, full_prefix, wb_url.mod)

                text = MementoUtils.wrap_timemap_header(
                    wb_url.url, timegate, timemap, res.text)
        return WbResponse.text_response(text,
                                        content_type=content_type,
                                        status=status)
Exemplo n.º 22
0
    def serve_cdx(self, environ, coll='$root'):
        base_url = self.rewriterapp.paths['cdx-server']

        #if coll == self.all_coll:
        #    coll = '*'

        cdx_url = base_url.format(coll=coll)

        if environ.get('QUERY_STRING'):
            cdx_url += '&' if '?' in cdx_url else '?'
            cdx_url += environ.get('QUERY_STRING')

        try:
            res = requests.get(cdx_url, stream=True)

            content_type = res.headers.get('Content-Type')

            return WbResponse.bin_stream(StreamIter(res.raw),
                                         content_type=content_type)

        except Exception as e:
            return WbResponse.text_response('Error: ' + str(e),
                                            status='400 Bad Request')