Exemplo n.º 1
0
    def _check_accept_dt(self, wb_url, environ):
        """Returns T/F indicating if the supplied WbUrl instance
        is for a timegate request

        :param WbUrl wb_url: The URL to be checked
        :param dict environ: The wsgi environment object for the request
        :return: T/F indicating if the WbUrl is for timegate request
        :rtype: bool
        """
        is_timegate = False
        if wb_url.is_latest_replay():
            accept_dt = environ.get('HTTP_ACCEPT_DATETIME')
            is_timegate = True
            if accept_dt:
                try:
                    wb_url.timestamp = http_date_to_timestamp(accept_dt)
                except Exception:
                    raise UpstreamException(400,
                                            url=wb_url.url,
                                            details='Invalid Accept-Datetime')
                    # return WbResponse.text_response('Invalid Accept-Datetime', status='400 Bad Request')

                wb_url.type = wb_url.REPLAY

            elif 'pywb_proxy_default_timestamp' in environ:
                wb_url.timestamp = environ['pywb_proxy_default_timestamp']
                wb_url.type = wb_url.REPLAY

        return is_timegate
Exemplo n.º 2
0
    def _check_accept_dt(self, wb_url, environ):
        is_timegate = False
        if wb_url.is_latest_replay():
            accept_dt = environ.get('HTTP_ACCEPT_DATETIME')
            is_timegate = True
            if accept_dt:
                try:
                    wb_url.timestamp = http_date_to_timestamp(accept_dt)
                except:
                    raise UpstreamException(400, url=wb_url.url, details='Invalid Accept-Datetime')
                    #return WbResponse.text_response('Invalid Accept-Datetime', status='400 Bad Request')

                wb_url.type = wb_url.REPLAY

        return is_timegate
Exemplo n.º 3
0
    def links_to_cdxobject(self, link_header, def_name):
        results = MementoUtils.parse_links(link_header, def_name)

        original = results['original']['url']
        key = canonicalize(original)

        mementos = results['mementos']

        for val in mementos:
            dt = val['datetime']
            ts = http_date_to_timestamp(dt)
            cdx = CDXObject()
            cdx['urlkey'] = key
            cdx['timestamp'] = ts
            cdx['url'] = original
            cdx['mem_rel'] = val.get('rel', '')
            cdx['memento_url'] = val['url']

            load_url = self._get_replay_url(cdx['timestamp'], original)

            cdx['load_url'] = load_url
            yield cdx
Exemplo n.º 4
0
    def make_memento_link(cls, url, type, dt, coll=None, memento_format=None):
        """Creates a memento link string

        :param str url: A URL
        :param str type: The rel type
        :param str dt: The datetime of the URL
        :param str|None coll: Optional name of a collection
        :param str|None memento_format: Optional string used to format the supplied URL
        :return: A memento link string
        :rtype: str
        """
        if memento_format:
            memento_format = memento_format.format(
                url=url, timestamp=http_date_to_timestamp(dt))
        else:
            memento_format = url

        res = '<{0}>; rel="{1}"; datetime="{2}"'.format(
            memento_format, type, dt)
        if coll:
            res += '; collection="{0}"'.format(coll)

        return res
Exemplo n.º 5
0
    def process_record(self, record, flow):
        headers = flow.response.headers
        url = flow.request.req_url
        scheme = flow.request.req_scheme

        if not self.content_rewriter:
            return record.http_headers, StreamIO(record.raw_stream)

        cookie_rewriter = None

        template_params = flow.extra_data

        environ = {
            'pywb_proxy_magic': self.proxy_magic,
            'webrec.template_params': template_params
        }

        wb_url = WbUrl(url)
        wb_prefix = ''
        host_prefix = flow.request.req_scheme + '://' + self.proxy_magic
        urlrewriter = SchemeOnlyUrlRewriter(wb_url, '')

        if flow.request.headers.get('X-Requested-With',
                                    '').lower() == 'xmlhttprequest':
            urlrewriter.rewrite_opts['is_ajax'] = True

        head_insert_func = (self.head_insert_view.create_insert_func(
            wb_url, wb_prefix, host_prefix, url, environ, False))

        urlkey = canonicalize(wb_url.url)

        cdx = CDXObject()
        cdx['urlkey'] = urlkey
        cdx['timestamp'] = http_date_to_timestamp(
            headers.get('Memento-Datetime'))
        cdx['url'] = wb_url.url
        if headers.get('Webagg-Source-Coll') == 'live':
            cdx['is_live'] = 'true'

        result = self.content_rewriter.rewrite_content(
            urlrewriter, record.http_headers, record.raw_stream,
            head_insert_func, urlkey, cdx, cookie_rewriter, environ)

        status_headers, gen, is_rw = result

        status_headers.remove_header('Content-Security-Policy')

        # check for content-length
        res = status_headers.get_header('content-length')
        try:
            if int(res) > 0:
                return status_headers, IterIdent(gen)
        except:
            pass

        # need to either chunk or buffer to get content-length
        if flow.request.http_version == 'HTTP/1.1':
            status_headers.remove_header('content-length')
            status_headers.headers.append(('Transfer-Encoding', 'chunked'))
            #gen = chunk_encode_iter(gen)
        else:
            gen = buffer_iter(status_headers, gen)

        return status_headers, IterIdent(gen)