Пример #1
0
 def initialize_options(self):
     from webrecorder.standalone.assetsutils import default_build
     from webrecorder.load.wamloader import WAMLoader
     default_build()
     WAMLoader.merge_webarchives()
     generate_git_hash_py('webrecorder')
     super(Install, self).initialize_options()
    def __init__(self, *args, **kwargs):
        BaseController.__init__(self, *args, **kwargs)

        config = kwargs['config']

        self.content_error_redirect = os.environ.get('CONTENT_ERROR_REDIRECT')

        config['csp-header'] = self.get_csp_header()

        self.browser_mgr = kwargs['browser_mgr']

        RewriterApp.__init__(self,
                             framed_replay=True,
                             jinja_env=kwargs['jinja_env'],
                             config=config)

        self.paths = config['url_templates']

        self.cookie_tracker = CookieTracker(self.redis)

        self.record_host = os.environ['RECORD_HOST']
        self.live_host = os.environ['WARCSERVER_HOST']
        self.replay_host = os.environ.get('WARCSERVER_PROXY_HOST')
        if not self.replay_host:
            self.replay_host = self.live_host

        self.wam_loader = WAMLoader()
        self._init_client_archive_info()

        self.dyn_stats = DynStats(self.redis, config)
Пример #3
0
 def initialize_options(self):
     from webrecorder.standalone.assetsutils import default_build
     from webrecorder.load.wamloader import WAMLoader
     default_build()
     WAMLoader.merge_webarchives()
     generate_git_hash_py('webrecorder')
     super(Install, self).initialize_options()
Пример #4
0
    def __init__(self, *args, **kwargs):
        BaseController.__init__(self, *args, **kwargs)

        config = kwargs['config']

        self.content_error_redirect = os.environ.get('CONTENT_ERROR_REDIRECT')

        config['csp-header'] = self.get_csp_header()

        self.browser_mgr = kwargs['browser_mgr']

        RewriterApp.__init__(self,
                             framed_replay=True,
                             jinja_env=kwargs['jinja_env'],
                             config=config)

        self.paths = config['url_templates']

        self.cookie_tracker = CookieTracker(self.redis)

        self.record_host = os.environ['RECORD_HOST']
        self.live_host = os.environ['WARCSERVER_HOST']
        self.replay_host = os.environ.get('WARCSERVER_PROXY_HOST')
        if not self.replay_host:
            self.replay_host = self.live_host
        self.session_redirect_host = os.environ.get('SESSION_REDIRECT_HOST')

        self.wam_loader = WAMLoader()
        self._init_client_archive_info()

        self.dyn_stats = DynStats(self.redis, config)
Пример #5
0
    def __init__(self, *args, **kwargs):
        BaseController.__init__(self, *args, **kwargs)

        config = kwargs['config']

        self.content_error_redirect = os.environ.get('CONTENT_ERROR_REDIRECT')

        config['csp-header'] = self.get_csp_header()

        self.browser_mgr = kwargs['browser_mgr']

        RewriterApp.__init__(self,
                             framed_replay=True,
                             jinja_env=kwargs['jinja_env'],
                             config=config)

        self.paths = config['url_templates']

        self.cookie_tracker = CookieTracker(self.redis)

        self.record_host = os.environ['RECORD_HOST']
        self.live_host = os.environ['WARCSERVER_HOST']
        self.replay_host = os.environ.get('WARCSERVER_PROXY_HOST')
        if not self.replay_host:
            self.replay_host = self.live_host
        self.session_redirect_host = os.environ.get('SESSION_REDIRECT_HOST')

        self.wam_loader = WAMLoader()
        self._init_client_archive_info()

        self.dyn_stats = DynStats(self.redis, config)

        # BEGIN PERMA CUSTOMIZATION
        # Perma occasionally refuses to play back the content of certain
        # URLs or domains. This is a temporary workaround, until we devise
        # a more universally satisfactory solution.
        self.refuse_playback = [
            url for url in os.environ.get('REFUSE_PLAYBACK', '').split(',')
            if url
        ]
        # We are experiencing unexpected, transient 404s that resolve on refrsh.
        # This is a temporary workaround/diagnostic experiment.
        self.sleep_on_404 = int(os.environ.get('SLEEP_ON_404', '2'))
Пример #6
0
    def __init__(self, app, jinja_env, config, redis):
        BaseController.__init__(self, app, jinja_env, None, config)
        RewriterApp.__init__(self,
                             framed_replay=True,
                             jinja_env=jinja_env,
                             config=config)

        self.paths = config['url_templates']

        self.cookie_key_templ = config['cookie_key_templ']

        self.cookie_tracker = CookieTracker(redis)

        self.record_host = os.environ['RECORD_HOST']
        self.live_host = os.environ['WEBAGG_HOST']
        self.replay_host = os.environ.get('WEBAGG_PROXY_HOST')
        if not self.replay_host:
            self.replay_host = self.live_host

        self.wam_loader = WAMLoader()
        self._init_client_archive_info()
        self.init_csp_header()
Пример #7
0
    def __init__(self, *args, **kwargs):
        super(WebRecRedisIndexer, self).__init__(*args, **kwargs)

        self.info_keys = kwargs.get('info_keys', [])
        self.rec_info_key_templ = kwargs.get('rec_info_key_templ')

        config = kwargs['config']

        self.coll_cdxj_key = Collection.COLL_CDXJ_KEY
        self.rec_file_key_template = Recording.REC_WARC_KEY

        self.wam_loader = WAMLoader()

        # set shared wam_loader for CDXJIndexer index writers
        CDXJIndexer.wam_loader = self.wam_loader

        self.stats = Stats(self.redis)
Пример #8
0
    def __init__(self, *args, **kwargs):
        super(WebRecRedisIndexer, self).__init__(*args, **kwargs)

        self.size_keys = kwargs.get('size_keys', [])
        self.rec_info_key_templ = kwargs.get('rec_info_key_templ')

        config = kwargs['config']

        self.temp_prefix = config['temp_prefix']

        self.user_usage_key = config['user_usage_key']
        self.temp_usage_key = config['temp_usage_key']

        self.rate_limit_key = config['rate_limit_key']

        self.rate_limit_hours = int(os.environ.get('RATE_LIMIT_HOURS', 0))
        self.rate_limit_ttl = self.rate_limit_hours * 60 * 60

        self.wam_loader = WAMLoader()

        # set shared wam_loader for CDXJIndexer index writers
        CDXJIndexer.wam_loader = self.wam_loader
Пример #9
0
    def __init__(self, *args, **kwargs):
        BaseController.__init__(self, *args, **kwargs)

        config = kwargs['config']

        self.content_error_redirect = os.environ.get('CONTENT_ERROR_REDIRECT')

        config['csp-header'] = self.get_csp_header()

        self.browser_mgr = kwargs['browser_mgr']

        RewriterApp.__init__(self,
                             framed_replay=True,
                             jinja_env=kwargs['jinja_env'],
                             config=config)

        self.paths = config['url_templates']

        self.cookie_tracker = CookieTracker(self.redis)

        self.record_host = os.environ['RECORD_HOST']
        self.live_host = os.environ['WARCSERVER_HOST']
        self.replay_host = os.environ.get('WARCSERVER_PROXY_HOST')
        if not self.replay_host:
            self.replay_host = self.live_host
        self.session_redirect_host = os.environ.get('SESSION_REDIRECT_HOST')

        self.wam_loader = WAMLoader()
        self._init_client_archive_info()

        self.dyn_stats = DynStats(self.redis, config)

        # BEGIN PERMA CUSTOMIZATION
        # Perma occasionally refuses to play back the content of certain
        # URLs or domains. This is a temporary workaround, until we devise
        # a more universally satisfactory solution.
        self.refuse_playback = [url for url in os.environ.get('REFUSE_PLAYBACK', '').split(',') if url]
Пример #10
0
class ContentController(BaseController, RewriterApp):
    DEF_REC_NAME = 'Recording Session'

    WB_URL_RX = re.compile('(([\d*]*)([a-z]+_|[$][a-z0-9:.-]+)?/)?([a-zA-Z]+:)?//.*')

    MODIFY_MODES = ('record', 'patch', 'extract')

    def __init__(self, app, jinja_env, config, redis):
        BaseController.__init__(self, app, jinja_env, None, config)

        config['csp-header'] = self.get_csp_header()

        RewriterApp.__init__(self,
                             framed_replay=True,
                             jinja_env=jinja_env,
                             config=config)

        self.paths = config['url_templates']

        self.cookie_key_templ = config['cookie_key_templ']

        self.cookie_tracker = CookieTracker(redis)

        self.record_host = os.environ['RECORD_HOST']
        self.live_host = os.environ['WARCSERVER_HOST']
        self.replay_host = os.environ.get('WARCSERVER_PROXY_HOST')
        if not self.replay_host:
            self.replay_host = self.live_host

        self.wam_loader = WAMLoader()
        self._init_client_archive_info()

    def _init_client_archive_info(self):
        self.client_archives = {}
        for pk, archive in self.wam_loader.replay_info.items():
            info = {'name': archive['name'],
                    'about': archive['about'],
                    'prefix': archive['replay_prefix'],
                   }
            if archive.get('parse_collection'):
                info['parse_collection'] = True

            self.client_archives[pk] = info

    def get_csp_header(self):
        csp = "default-src 'unsafe-eval' 'unsafe-inline' 'self' data: blob: mediastream: ws: wss: "
        if self.content_host != self.app_host:
            csp += self.app_host + '/_set_session'

        csp += "; form-action 'self'"
        return csp

    def init_routes(self):
        # REDIRECTS
        @self.app.route('/record/<wb_url:path>', method='ANY')
        def redir_new_temp_rec(wb_url):
            coll = 'temp'
            rec = self.DEF_REC_NAME
            wb_url = self.add_query(wb_url)
            return self.do_create_new_and_redir(coll, rec, wb_url, 'record')

        @self.app.route('/$record/<coll>/<rec>/<wb_url:path>', method='ANY')
        def redir_new_record(coll, rec, wb_url):
            wb_url = self.add_query(wb_url)
            return self.do_create_new_and_redir(coll, rec, wb_url, 'record')

        # TAGS
        @self.app.get(['/_tags/', '/_tags/<tags:re:([\w,-]+)>'])
        @self.jinja2_view('paging_display.html')
        def tag_display(tags=None):
            if not self.manager.is_beta():
                raise HTTPError(404)

            tags = tags.split(',') if tags else self.manager.get_available_tags()
            items = {}
            keys = []

            active_tags = self.manager.get_available_tags()

            for tag in tags:
                if tag in active_tags:
                    keys.append(tag)
                    items[tag] = self.manager.get_pages_for_tag(tag)

            return {'data': items, 'keys': keys}

        # COLLECTIONS
        @self.app.get(['/_display/<user>', '/_display/<user>/<collections:re:([\w,-]+)>'])
        @self.jinja2_view('paging_display.html')
        def collection_display(user, collections=None):
            if not self.manager.is_beta():
                raise HTTPError(404)

            user_collections = [c['id'] for c in self.manager.get_collections(user)]
            colls = collections.split(',') if collections else user_collections
            items = {}
            keys = []

            for coll in colls:
                if coll in user_collections:
                    keys.append(coll)
                    items[coll] = self.manager.list_coll_pages(user, coll)

            return {'data': items, 'keys': keys}

        # COOKIES
        @self.app.get(['/<user>/<coll>/$add_cookie'], method='POST')
        def add_cookie(user, coll):
            if not self.manager.has_collection(user, coll):
                self._raise_error(404, 'Collection not found',
                                  api=True, id=coll)

            rec = request.query.getunicode('rec', '*')

            name = request.forms.getunicode('name')
            value = request.forms.getunicode('value')
            domain = request.forms.getunicode('domain')

            if not domain:
                return {'error_message': 'no domain'}

            self.add_cookie(user, coll, rec, name, value, domain)

            return {'success': domain}

        # PROXY
        @self.app.route('/_proxy/<url:path>', method='ANY')
        def do_proxy(url):
            return self.do_proxy(url)

        # LIVE DEBUG
        #@self.app.route('/live/<wb_url:path>', method='ANY')
        def live(wb_url):
            request.path_shift(1)

            return self.handle_routing(wb_url, user='******', coll='temp', rec='', type='live')

        # EMDED
        @self.app.route('/_embed/<user>/<coll>/<wb_url:path>', method='ANY')
        def embed_replay(user, coll, wb_url):
            request.path_shift(3)
            #return self.do_replay_coll_or_rec(user, coll, wb_url, is_embed=True)
            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll',
                                       is_embed=True)


        # DISPLAY
        @self.app.route('/_embed_noborder/<user>/<coll>/<wb_url:path>', method='ANY')
        def embed_replay(user, coll, wb_url):
            request.path_shift(3)
            #return self.do_replay_coll_or_rec(user, coll, wb_url, is_embed=True,
            #                                  is_display=True)
            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll',
                                       is_embed=True, is_display=True)


        # CONTENT ROUTES
        # Record
        @self.app.route('/<user>/<coll>/<rec:path>/record/<wb_url:path>', method='ANY')
        def do_record(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='record', redir_route='record')

        # Patch
        @self.app.route('/<user>/<coll>/<rec>/patch/<wb_url:path>', method='ANY')
        def do_patch(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='patch', redir_route='patch')

        # Extract
        @self.app.route('/<user>/<coll>/<rec:path>/extract\:<archive>/<wb_url:path>', method='ANY')
        def do_extract_patch_archive(user, coll, rec, wb_url, archive):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='extract',
                                       sources=archive,
                                       inv_sources=archive,
                                       redir_route='extract:' + archive)

        @self.app.route('/<user>/<coll>/<rec:path>/extract_only\:<archive>/<wb_url:path>', method='ANY')
        def do_extract_only_archive(user, coll, rec, wb_url, archive):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='extract',
                                       sources=archive,
                                       inv_sources='*',
                                       redir_route='extract_only:' + archive)

        @self.app.route('/<user>/<coll>/<rec:path>/extract/<wb_url:path>', method='ANY')
        def do_extract_all(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='extract',
                                       sources='*',
                                       inv_sources='*',
                                       redir_route='extract')

        # Replay
        @self.app.route('/<user>/<coll>/<rec>/replay/<wb_url:path>', method='ANY')
        def do_replay_rec(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='replay')

        # Replay Coll
        @self.app.route('/<user>/<coll>/<wb_url:path>', method='ANY')
        def do_replay_coll(user, coll, wb_url):
            request.path_shift(2)

            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll')

        # Session redir
        @self.app.route(['/_set_session'])
        def set_sesh():
            sesh = self.get_session()

            if self.is_content_request():
                id = request.query.getunicode('id')
                sesh.set_id(id)
                return self.redirect(request.query.getunicode('path'))

            else:
                url = request.environ['wsgi.url_scheme'] + '://' + self.content_host
                response.headers['Access-Control-Allow-Origin'] = url
                response.headers['Cache-Control'] = 'no-cache'

                redirect(url + '/_set_session?' + request.environ['QUERY_STRING'] + '&id=' + quote(sesh.get_id()))

        @self.app.route(['/_clear_session'])
        def clear_sesh():
            sesh = self.get_session()
            sesh.delete()
            return self.redir_host(None, request.query.getunicode('path', '/'))

    def do_proxy(self, url):
        info = self.manager.browser_mgr.init_cont_browser_sesh()
        if not info:
            return {'error_message': 'conn not from valid containerized browser'}

        try:
            kwargs = info
            kwargs['coll_orig'] = kwargs['coll']
            kwargs['coll'] = quote(kwargs['coll'])
            kwargs['rec_orig'] = kwargs['rec']
            kwargs['rec'] = quote(kwargs['rec'], '/*')

            if kwargs['type'] == 'replay-coll':
                self.manager.sync_coll_index(kwargs['user'], kwargs['coll_orig'], exists=False,
                                             do_async=False)

            url = self.add_query(url)

            kwargs['url'] = url
            wb_url = kwargs.get('request_ts', '') + 'bn_/' + url

            request.environ['webrec.template_params'] = kwargs

            remote_ip = info.get('remote_ip')

            if remote_ip and info['type'] in self.MODIFY_MODES:
                if self.manager.is_rate_limited(info['user'], remote_ip):
                    raise HTTPError(402, 'Rate Limit')

            resp = self.render_content(wb_url, kwargs, request.environ)

            resp = HTTPResponse(body=resp.body,
                                status=resp.status_headers.statusline,
                                headers=resp.status_headers.headers)

            return resp

        except Exception as e:
            @self.jinja2_view('content_error.html')
            def handle_error(status_code, err_body, environ):
                response.status = status_code
                kwargs['url'] = url
                kwargs['status'] = status_code
                kwargs['err_body'] = err_body
                kwargs['host_prefix'] = self.get_host_prefix(environ)
                kwargs['proxy_magic'] = environ.get('wsgiprox.proxy_host', '')
                return kwargs

            status_code = 500
            if hasattr(e, 'status_code'):
                status_code = e.status_code

            if hasattr(e, 'body'):
                err_body = e.body
            elif hasattr(e, 'msg'):
                err_body = e.msg
            else:
                err_body = ''

            return handle_error(status_code, err_body, request.environ)

    def check_remote_archive(self, wb_url, mode, wb_url_obj=None):
        wb_url_obj = wb_url_obj or WbUrl(wb_url)

        res = self.wam_loader.find_archive_for_url(wb_url_obj.url)
        if not res:
            return

        pk, new_url, id_ = res

        mode = 'extract:' + id_

        new_url = WbUrl(new_url).to_str(mod=wb_url_obj.mod)

        return mode, new_url

    def do_create_new_and_redir(self, coll, rec, wb_url, mode):
        if mode == 'record':
            result = self.check_remote_archive(wb_url, mode)
            if result:
                mode, wb_url = result

        rec_title = rec

        user = self.manager.get_curr_user()

        if not user:
            user = self.manager.get_anon_user(True)
            coll = 'temp'
            coll_title = 'Temporary Collection'

        else:
            coll_title = coll
            coll = self.sanitize_title(coll_title)

        if not self.manager.has_collection(user, coll):
            self.manager.create_collection(user, coll, coll_title)

        rec = self._create_new_rec(user, coll, rec_title, mode)

        if mode.startswith('extract:'):
            patch_rec = self._create_new_rec(user, coll,
                                             self.patch_of_name(rec_title),
                                             'patch')

        new_url = '/{user}/{coll}/{rec}/{mode}/{url}'.format(user=user,
                                                             coll=coll,
                                                             rec=rec,
                                                             mode=mode,
                                                             url=wb_url)
        return self.redirect(new_url)

    def is_content_request(self):
        if not self.content_host:
            return False

        return request.environ.get('HTTP_HOST') == self.content_host

    def redir_set_session(self):
        full_path = request.environ['SCRIPT_NAME'] + request.environ['PATH_INFO']
        full_path = self.add_query(full_path)
        self.redir_host(None, '/_set_session?path=' + quote(full_path))

    def _create_new_rec(self, user, coll, title, mode, no_dupe=False):
        rec = self.sanitize_title(title)
        rec_type = 'patch' if mode == 'patch' else None
        result = self.manager.create_recording(user, coll, rec, title,
                                               rec_type=rec_type,
                                               no_dupe=no_dupe)
        rec = result['id']
        return rec

    def patch_of_name(self, name, is_id=False):
        if not is_id:
            return 'Patch of ' + name
        else:
            return 'patch-of-' + name

    def handle_routing(self, wb_url, user, coll, rec, type,
                       is_embed=False,
                       is_display=False,
                       sources='',
                       inv_sources='',
                       redir_route=None):

        wb_url = self.add_query(wb_url)
        if user == '_new' and redir_route:
            return self.do_create_new_and_redir(coll, rec, wb_url, redir_route)

        sesh = self.get_session()

        if sesh.is_new() and self.is_content_request():
            self.redir_set_session()

        remote_ip = None
        frontend_cache_header = None
        patch_rec = ''

        if type in self.MODIFY_MODES:
            if not self.manager.has_recording(user, coll, rec):
                self._redir_if_sanitized(self.sanitize_title(rec),
                                         rec,
                                         wb_url)

                # don't auto create recording for inner frame w/o accessing outer frame
                raise HTTPError(404, 'No Such Recording')

            elif not self.manager.is_recording_open(user, coll, rec):
                # force creation of new recording as this one is closed
                raise HTTPError(404, 'Recording not open')

            self.manager.assert_can_write(user, coll)

            if self.manager.is_out_of_space(user):
                raise HTTPError(402, 'Out of Space')

            remote_ip = self._get_remote_ip()

            if self.manager.is_rate_limited(user, remote_ip):
                raise HTTPError(402, 'Rate Limit')

            if inv_sources and inv_sources != '*':
                patch_rec = self.patch_of_name(rec, True)

        if type == 'replay-coll':
            res = self.manager.has_collection_is_public(user, coll)
            if not res:
                self._redir_if_sanitized(self.sanitize_title(coll),
                                         coll,
                                         wb_url)

                raise HTTPError(404, 'No Such Collection')

            if res != 'public':
                frontend_cache_header = ('Cache-Control', 'private')

        elif type == 'replay':
            if not self.manager.has_recording(user, coll, rec):
                raise HTTPError(404, 'No Such Recording')

        request.environ['SCRIPT_NAME'] = quote(request.environ['SCRIPT_NAME'], safe='/:')

        wb_url = self._context_massage(wb_url)

        wb_url_obj = WbUrl(wb_url)
        is_top_frame = (wb_url_obj.mod == self.frame_mod or wb_url_obj.mod.startswith('$br:'))

        if type == 'record' and is_top_frame:
            result = self.check_remote_archive(wb_url, type, wb_url_obj)
            if result:
                mode, wb_url = result
                new_url = '/{user}/{coll}/{rec}/{mode}/{url}'.format(user=user,
                                                                     coll=coll,
                                                                     rec=rec,
                                                                     mode=mode,
                                                                     url=wb_url)
                return self.redirect(new_url)

        elif type == 'replay-coll' and not is_top_frame:
            self.manager.sync_coll_index(user, coll, exists=False,
                                         do_async=False)

        kwargs = dict(user=user,
                      coll_orig=coll,
                      id=sesh.get_id(),
                      rec_orig=rec,
                      coll=quote(coll),
                      rec=quote(rec, safe='/*'),
                      type=type,
                      sources=sources,
                      inv_sources=inv_sources,
                      patch_rec=patch_rec,
                      ip=remote_ip,
                      is_embed=is_embed,
                      is_display=is_display,
                      use_js_obj_proxy=True)

        try:
            self.check_if_content(wb_url_obj, request.environ, is_top_frame)

            resp = self.render_content(wb_url, kwargs, request.environ)

            if frontend_cache_header:
                resp.status_headers.headers.append(frontend_cache_header)

            resp = HTTPResponse(body=resp.body,
                                status=resp.status_headers.statusline,
                                headers=resp.status_headers.headers)

            return resp

        except UpstreamException as ue:
            @self.jinja2_view('content_error.html')
            def handle_error(status_code, type, url, err_info):
                response.status = status_code
                return {'url': url,
                        'status': status_code,
                        'error': err_info.get('error'),
                        'user': self.get_view_user(user),
                        'coll': coll,
                        'rec': rec,
                        'type': type,
                        'app_host': self.app_host,
                       }

            return handle_error(ue.status_code, type, ue.url, ue.msg)

    def check_if_content(self, wb_url, environ, is_top_frame):
        if not wb_url.is_replay():
            return

        if not self.content_host:
            return

        if is_top_frame:
            if self.is_content_request():
                self.redir_host(self.app_host)
        else:
            if not self.is_content_request():
                self.redir_host(self.content_host)

    def _filter_headers(self, type, status_headers):
        if type in ('replay', 'replay-coll'):
            new_headers = []
            for name, value in status_headers.headers:
                if name.lower() != 'set-cookie':
                    new_headers.append((name, value))

            status_headers.headers = new_headers

    def _inject_nocache_headers(self, status_headers, kwargs):
        if 'browser_id' in kwargs:
            status_headers.headers.append(
                ('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate')
            )

    def _redir_if_sanitized(self, id, title, wb_url):
        if id != title:
            target = request.script_name.replace(title, id)
            target += wb_url
            self.redirect(target)

    def _context_massage(self, wb_url):
        # reset HTTP_COOKIE to guarded request_cookie for LiveRewriter
        if 'webrec.request_cookie' in request.environ:
            request.environ['HTTP_COOKIE'] = request.environ['webrec.request_cookie']

        try:
            del request.environ['HTTP_X_PUSH_STATE_REQUEST']
        except:
            pass

        #TODO: generalize
        if wb_url.endswith('&spf=navigate') and wb_url.startswith('mp_/https://www.youtube.com'):
            wb_url = wb_url.replace('&spf=navigate', '')

        return wb_url

    def add_query(self, url):
        if request.query_string:
            url += '?' + request.query_string

        return url

    def get_cookie_key(self, kwargs):
        sesh = self.get_session()
        id = sesh.get_id()
        kwargs['id'] = id
        if kwargs.get('rec') == '*':
            kwargs['rec'] = '<all>'

        return self.cookie_key_templ.format(**kwargs)

    def add_cookie(self, user, coll, rec, name, value, domain):
        key = self.get_cookie_key(dict(user=user,
                                       coll=coll,
                                       rec=rec))

        self.cookie_tracker.add_cookie(key, domain, name, value)

    def _get_remote_ip(self):
        remote_ip = request.environ.get('HTTP_X_REAL_IP')
        remote_ip = remote_ip or request.environ.get('REMOTE_ADDR', '')
        return remote_ip

    ## RewriterApp overrides
    def get_base_url(self, wb_url, kwargs):
        # for proxy mode, 'upstream_url' already provided
        # just use that
        base_url = kwargs.get('upstream_url')
        if base_url:
            base_url = base_url.format(**kwargs)
            return base_url

        type = kwargs['type']

        base_url = self.paths[type].format(record_host=self.record_host,
                                           replay_host=self.replay_host,
                                           live_host=self.live_host,
                                           **kwargs)

        return base_url

    def process_query_cdx(self, cdx, wb_url, kwargs):
        rec = kwargs.get('rec')
        if not rec or rec == '*':
            rec = cdx['source'].rsplit(':', 2)[-2]

        cdx['rec'] = rec

    def get_query_params(self, wb_url, kwargs):
        collection = self.manager.get_collection(kwargs['user'], kwargs['coll_orig'])
        kwargs['rec_titles'] = dict((rec['id'], rec['title']) for rec in collection['recordings'])

        kwargs['user'] = self.get_view_user(kwargs['user'])
        kwargs['coll_title'] = collection.get('title', '')
        return kwargs

    def get_host_prefix(self, environ):
        if self.content_host and 'wsgiprox.proxy_host' not in environ:
            return environ['wsgi.url_scheme'] + '://' + self.content_host
        else:
            return super(ContentController, self).get_host_prefix(environ)

    def get_top_url(self, full_prefix, wb_url, cdx, kwargs):
        if wb_url.mod != self.frame_mod and self.content_host != self.app_host:
            full_prefix = full_prefix.replace(self.content_host, self.app_host)

        return super(ContentController, self).get_top_url(full_prefix, wb_url, cdx, kwargs)

    def get_top_frame_params(self, wb_url, kwargs):
        type = kwargs['type']

        top_prefix = super(ContentController, self).get_host_prefix(request.environ)
        top_prefix += self.get_rel_prefix(request.environ)

        if type == 'live':
            return {'curr_mode': type,
                    'is_embed': kwargs.get('is_embed'),
                    'is_display': kwargs.get('is_display'),
                    'top_prefix': top_prefix}

        # refresh cookie expiration,
        # disable until can guarantee cookie is not changed!
        #self.get_session().update_expires()

        info = self.manager.get_content_inject_info(kwargs['user'],
                                                    kwargs['coll_orig'],
                                                    kwargs['rec_orig'])

        return {'info': info,
                'curr_mode': type,
                'user': self.get_view_user(kwargs['user']),
                'coll': kwargs['coll'],
                'coll_orig': kwargs['coll_orig'],
                'rec': kwargs['rec'],
                'rec_orig': kwargs['rec_orig'],
                'coll_title': info.get('coll_title', ''),
                'rec_title': info.get('rec_title', ''),
                'is_embed': kwargs.get('is_embed'),
                'is_display': kwargs.get('is_display'),
                'top_prefix': top_prefix,
                'sources': kwargs.get('sources'),
                'inv_sources': kwargs.get('inv_sources'),
               }

    def _add_custom_params(self, cdx, resp_headers, kwargs):
        try:
            self._add_stats(cdx, resp_headers, kwargs)
        except:
            import traceback
            traceback.print_exc()

    def _add_stats(self, cdx, resp_headers, kwargs):
        type_ = kwargs['type']
        if type_ in ('record', 'live'):
            return

        source = cdx.get('source')
        if not source:
            return

        if source == 'local':
            source = 'replay'

        if source == 'replay' and type_ == 'patch':
            return

        orig_source = cdx.get('orig_source_id')
        if orig_source:
            source = orig_source

        ra_rec = None

        # set source in recording-key
        if type_ in self.MODIFY_MODES:
            skip = resp_headers.get('Recorder-Skip')

            if not skip and source not in ('live', 'replay'):
                ra_rec = unquote(resp_headers.get('Recorder-Rec', ''))
                ra_rec = ra_rec or kwargs['rec_orig']

        url = cdx.get('url')
        referrer = request.environ.get('HTTP_REFERER')

        if not referrer:
            referrer = url
        elif ('wsgiprox.proxy_host' not in request.environ and
            request.environ.get('HTTP_HOST') in referrer):
            referrer = url

        self.manager.update_dyn_stats(url, kwargs, referrer, source, ra_rec)

    def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs):
        # test if request specifies a containerized browser
        if wb_url.mod.startswith('$br:'):
            return self.handle_browser_embed(wb_url, kwargs)

        return RewriterApp.handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs)

    def handle_browser_embed(self, wb_url, kwargs):
        #handle cbrowsers
        browser_id = wb_url.mod.split(':', 1)[1]

        kwargs['browser_can_write'] = '1' if self.manager.can_write_coll(kwargs['user'], kwargs['coll']) else '0'

        kwargs['remote_ip'] = self._get_remote_ip()

        # container redis info
        inject_data = self.manager.browser_mgr.request_new_browser(browser_id, wb_url, kwargs)
        if 'error_message' in inject_data:
            self._raise_error(400, inject_data['error_message'])

        inject_data.update(self.get_top_frame_params(wb_url, kwargs))

        @self.jinja2_view('browser_embed.html')
        def browser_embed(data):
            return data

        return browser_embed(inject_data)
Пример #11
0
class ContentController(BaseController, RewriterApp):
    DEF_REC_NAME = 'Recording Session'

    WB_URL_RX = re.compile('(([\d*]*)([a-z]+_|[$][a-z0-9:.-]+)?/)?([a-zA-Z]+:)?//.*')

    MODIFY_MODES = ('record', 'patch', 'extract')

    def __init__(self, *args, **kwargs):
        BaseController.__init__(self, *args, **kwargs)

        config = kwargs['config']

        self.content_error_redirect = os.environ.get('CONTENT_ERROR_REDIRECT')

        config['csp-header'] = self.get_csp_header()

        self.browser_mgr = kwargs['browser_mgr']

        RewriterApp.__init__(self,
                             framed_replay=True,
                             jinja_env=kwargs['jinja_env'],
                             config=config)

        self.paths = config['url_templates']

        self.cookie_tracker = CookieTracker(self.redis)

        self.record_host = os.environ['RECORD_HOST']
        self.live_host = os.environ['WARCSERVER_HOST']
        self.replay_host = os.environ.get('WARCSERVER_PROXY_HOST')
        if not self.replay_host:
            self.replay_host = self.live_host

        self.wam_loader = WAMLoader()
        self._init_client_archive_info()

        self.dyn_stats = DynStats(self.redis, config)

    def _init_client_archive_info(self):
        self.client_archives = {}
        for pk, archive in self.wam_loader.replay_info.items():
            info = {'name': archive['name'],
                    'about': archive['about'],
                    'prefix': archive['replay_prefix'],
                   }
            if archive.get('parse_collection'):
                info['parse_collection'] = True

            self.client_archives[pk] = info

    def get_csp_header(self):
        csp = "default-src 'unsafe-eval' 'unsafe-inline' 'self' data: blob: mediastream: ws: wss: "
        if self.app_host and self.content_host != self.app_host:
            csp += self.app_host + '/_set_session'

        if self.content_error_redirect:
            csp += ' ' + self.content_error_redirect

        csp += "; form-action 'self'"
        return csp

    def init_routes(self):
        wr_api_spec.set_curr_tag('External Archives')

        @self.app.get('/api/v1/client_archives')
        def get_client_archives():
            return self.client_archives

        wr_api_spec.set_curr_tag('Browsers')

        @self.app.get('/api/v1/create_remote_browser')
        def create_browser():
            """ Api to launch remote browser instances
            """
            sesh = self.get_session()

            if sesh.is_new() and self.is_content_request():
                self._raise_error(403, 'invalid_browser_request')

            browser_id = request.query['browser']

            Stats(self.redis).incr_browser(browser_id)

            user = self.get_user(redir_check=False)

            data = request.query

            coll_name = data.getunicode('coll', '')
            rec = data.get('rec', '')

            mode = data.get('mode', '')

            url = data.getunicode('url', '')
            timestamp = data.get('timestamp', '')

            sources = ''
            inv_sources = ''
            patch_rec = ''

            collection = user.get_collection_by_name(coll_name)
            recording = collection.get_recording(rec)

            if not collection:
                self._raise_error(404, 'no_such_collection')

            if mode == 'extract':
                # Extract from All, Patch from None
                sources = '*'
                inv_sources = '*'
            elif mode.startswith('extract:'):
                # Extract from One, Patch from all but one
                sources = mode.split(':', 1)[1]
                inv_sources = sources
                # load patch recording also
                #patch_recording = collection.get_recording(recording['patch_rec'])
                if recording:
                    patch_rec = recording.get_prop('patch_rec')

                mode = 'extract'
            elif mode.startswith('extract_only:'):
                # Extract from one only, no patching
                sources = mode.split(':', 1)[1]
                inv_sources = '*'
                mode = 'extract'

            if mode in self.MODIFY_MODES:
                if not recording:
                    return self._raise_error(404, 'no_such_recording')

                #rec = recording.my_id
            elif mode in ('replay', 'replay-coll'):
                rec = '*'
            else:
                return self._raise_error(400, 'invalid_mode')


            browser_can_write = '1' if self.access.can_write_coll(collection) else '0'

            remote_ip = self._get_remote_ip()

            # build kwargs
            kwargs = dict(user=user.name,
                          id=sesh.get_id(),
                          coll=collection.my_id,
                          rec=rec,
                          coll_name=quote(coll_name),
                          #rec_name=quote(rec_name, safe='/*'),

                          type=mode,
                          sources=sources,
                          inv_sources=inv_sources,
                          patch_rec=patch_rec,

                          remote_ip=remote_ip,
                          ip=remote_ip,

                          browser=browser_id,
                          url=url,
                          request_ts=timestamp,

                          browser_can_write=browser_can_write)

            data = self.browser_mgr.request_new_browser(kwargs)

            if 'error_message' in data:
                self._raise_error(400, data['error_message'])

            return data

        # UPDATE REMOTE BROWSER CONFIG
        @self.app.get('/api/v1/update_remote_browser/<reqid>')
        def update_remote_browser(reqid):
            user, collection = self.load_user_coll(api=True)

            timestamp = request.query.getunicode('timestamp')
            type_ = request.query.getunicode('type')

            # if switching mode, need to have write access
            # for timestamp, only read access
            if type_:
                self.access.assert_can_write_coll(collection)
            else:
                self.access.assert_can_read_coll(collection)

            return self.browser_mgr.update_remote_browser(reqid,
                                                          type_=type_,
                                                          timestamp=timestamp)

        # REDIRECTS
        @self.app.route('/record/<wb_url:path>', method='ANY')
        def redir_new_temp_rec(wb_url):
            coll_name = 'temp'
            rec_title = self.DEF_REC_NAME
            wb_url = self.add_query(wb_url)
            return self.do_create_new_and_redir(coll_name, rec_title, wb_url, 'record')

        @self.app.route('/$record/<coll_name>/<rec_title>/<wb_url:path>', method='ANY')
        def redir_new_record(coll_name, rec_title, wb_url):
            wb_url = self.add_query(wb_url)
            return self.do_create_new_and_redir(coll_name, rec_title, wb_url, 'record')

        # API NEW
        wr_api_spec.set_curr_tag('Recordings')

        @self.app.post('/api/v1/new')
        def api_create_new():
            self.redir_host()

            url = request.json.get('url')
            coll = request.json.get('coll')
            mode = request.json.get('mode')

            desc = request.json.get('desc', '')

            browser = request.json.get('browser')
            is_content = request.json.get('is_content') and not browser
            timestamp = request.json.get('timestamp')

            wb_url = self.construct_wburl(url, timestamp, browser, is_content)

            host = self.content_host if is_content else self.app_host
            if not host:
                host = request.urlparts.netloc

            full_url = request.environ['wsgi.url_scheme'] + '://' + host

            url, rec, patch_rec = self.do_create_new(coll, '', wb_url, mode, desc=desc)

            full_url += url

            return {'url': full_url,
                    'rec_name': rec,
                    'patch_rec_name': patch_rec
                   }

        # COOKIES
        wr_api_spec.set_curr_tag('Cookies')

        @self.app.post('/api/v1/auth/cookie')
        def add_cookie():
            user, collection = self.load_user_coll()

            data = request.json or {}

            rec_name = data.get('rec', '*')
            recording = collection.get_recording(rec_name)

            name = data.get('name')
            value = data.get('value')
            domain = data.get('domain')

            if not domain:
                return self._raise_error(400, 'domain_missing')

            self.add_cookie(user, collection, recording, name, value, domain)

            return {'success': domain}

        # PROXY
        @self.app.route('/_proxy/<url:path>', method='ANY')
        def do_proxy(url):
            return self.do_proxy(url)

        # PROXY with CORS
        @self.app.route('/proxy-fetch/<url:path>', method='GET')
        def do_proxy_fetch_cors(url):
            res = self.do_proxy(url)

            if 'HTTP_ORIGIN' in request.environ:
                self.set_options_headers(None, None, res)

            return res

        # LIVE DEBUG
        #@self.app.route('/live/<wb_url:path>', method='ANY')
        def live(wb_url):
            request.path_shift(1)

            return self.handle_routing(wb_url, user='******', coll='temp', rec='', type='live')

        # EMDED
        @self.app.route('/_embed/<user>/<coll>/<wb_url:path>', method='ANY')
        def embed_replay(user, coll, wb_url):
            request.path_shift(3)
            #return self.do_replay_coll_or_rec(user, coll, wb_url, is_embed=True)
            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll',
                                       is_embed=True)


        # DISPLAY
        @self.app.route('/_embed_noborder/<user>/<coll>/<wb_url:path>', method='ANY')
        def embed_replay(user, coll, wb_url):
            request.path_shift(3)
            #return self.do_replay_coll_or_rec(user, coll, wb_url, is_embed=True,
            #                                  is_display=True)
            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll',
                                       is_embed=True, is_display=True)


        # CONTENT ROUTES
        # Record
        @self.app.route('/<user>/<coll>/<rec>/record/<wb_url:path>', method='ANY')
        def do_record(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='record', redir_route='record')

        # Patch
        @self.app.route('/<user>/<coll>/<rec>/patch/<wb_url:path>', method='ANY')
        def do_patch(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='patch', redir_route='patch')

        # Extract
        @self.app.route('/<user>/<coll>/<rec>/extract\:<archive>/<wb_url:path>', method='ANY')
        def do_extract_patch_archive(user, coll, rec, wb_url, archive):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='extract',
                                       sources=archive,
                                       inv_sources=archive,
                                       redir_route='extract:' + archive)

        @self.app.route('/<user>/<coll>/<rec>/extract_only\:<archive>/<wb_url:path>', method='ANY')
        def do_extract_only_archive(user, coll, rec, wb_url, archive):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='extract',
                                       sources=archive,
                                       inv_sources='*',
                                       redir_route='extract_only:' + archive)

        @self.app.route('/<user>/<coll>/<rec>/extract/<wb_url:path>', method='ANY')
        def do_extract_all(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='extract',
                                       sources='*',
                                       inv_sources='*',
                                       redir_route='extract')

        # REPLAY
        # Replay List
        @self.app.route('/<user>/<coll>/list/<list_id>/<bk_id>/<wb_url:path>', method='ANY')
        def do_replay_rec(user, coll, list_id, bk_id, wb_url):
            request.path_shift(5)

            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll')

        # Replay Recording
        @self.app.route('/<user>/<coll>/<rec>/replay/<wb_url:path>', method='ANY')
        def do_replay_rec(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='replay')

        # Replay Coll
        @self.app.route('/<user>/<coll>/<wb_url:path>', method='ANY')
        def do_replay_coll(user, coll, wb_url):
            request.path_shift(2)

            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll')

        # Session redir
        @self.app.get(['/_set_session'])
        def set_sesh():
            sesh = self.get_session()

            if self.is_content_request():
                cookie = request.query.getunicode('cookie')
                sesh.set_id_from_cookie(cookie)
                return self.redirect(request.query.getunicode('path'))

            else:
                url = request.environ['wsgi.url_scheme'] + '://' + self.content_host
                self.set_options_headers(self.content_host, self.app_host)
                response.headers['Cache-Control'] = 'no-cache'

                cookie = request.query.getunicode('webrec.sesh_cookie')

                # otherwise, check if content cookie provided
                # already have same session, just redirect back
                # likely a real 404 not found
                if sesh.is_same_session(request.query.getunicode('content_cookie')):
                    redirect(url + request.query.getunicode('path'))

                # if anon, ensure session is persisted before setting content session
                # generate cookie to pass
                if not cookie:
                    self.access.init_session_user(persist=True)
                    cookie = sesh.get_cookie()

                cookie = quote(cookie)
                url += '/_set_session?{0}&cookie={1}'.format(request.environ['QUERY_STRING'], cookie)
                redirect(url)

        # OPTIONS
        @self.app.route('/_set_session', method='OPTIONS')
        def set_sesh_options():
            self.set_options_headers(self.content_host, self.app_host)
            return ''

        @self.app.route('/_clear_session', method='OPTIONS')
        def set_clear_options():
            self.set_options_headers(self.app_host, self.content_host)
            return ''

        # CLEAR CONTENT SESSION
        @self.app.get(['/_clear_session'])
        def clear_sesh():
            self.set_options_headers(self.app_host, self.content_host)
            response.headers['Cache-Control'] = 'no-cache'

            if not self.is_content_request():
                self._raise_error(400, 'invalid_request')

            try:
                # delete session (will updated cookie)
                self.get_session().delete()
                return {'success': 'logged_out'}

            except Exception as e:
                self._raise_error(400, 'invalid_request')

    def do_proxy(self, url):
        info = self.browser_mgr.init_cont_browser_sesh()
        if not info:
            return self._raise_error(400, 'invalid_connection_source')

        try:
            kwargs = info
            user = info['the_user']
            collection = info['collection']
            recording = info['recording']

            if kwargs['type'] == 'replay-coll':
                collection.sync_coll_index(exists=False,  do_async=False)

            url = self.add_query(url)

            kwargs['url'] = url
            wb_url = kwargs.get('request_ts', '') + 'bn_/' + url

            request.environ['webrec.template_params'] = kwargs

            remote_ip = info.get('remote_ip')

            if remote_ip and info['type'] in self.MODIFY_MODES:
                remote_ip = self.check_rate_limit(user, remote_ip)
                kwargs['ip'] = remote_ip

            resp = self.render_content(wb_url, kwargs, request.environ)

            resp = HTTPResponse(body=resp.body,
                                status=resp.status_headers.statusline,
                                headers=resp.status_headers.headers)

            return resp

        except Exception as e:
            import traceback
            traceback.print_exc()

            @self.jinja2_view('content_error.html')
            def handle_error(status_code, err_body, environ):
                response.status = status_code
                kwargs['url'] = url
                kwargs['status'] = status_code
                kwargs['err_body'] = err_body
                kwargs['host_prefix'] = self.get_host_prefix(environ)
                kwargs['proxy_magic'] = environ.get('wsgiprox.proxy_host', '')
                return kwargs

            status_code = 500
            if hasattr(e, 'status_code'):
                status_code = e.status_code

            if hasattr(e, 'body'):
                err_body = e.body
            elif hasattr(e, 'msg'):
                err_body = e.msg
            else:
                err_body = ''

            return handle_error(status_code, err_body, request.environ)

    def check_remote_archive(self, wb_url, mode, wb_url_obj=None):
        wb_url_obj = wb_url_obj or WbUrl(wb_url)

        res = self.wam_loader.find_archive_for_url(wb_url_obj.url)
        if not res:
            return

        pk, new_url, id_ = res

        mode = 'extract:' + id_

        new_url = WbUrl(new_url).to_str(mod=wb_url_obj.mod)

        return mode, new_url

    def do_create_new_and_redir(self, coll_name, rec_name, wb_url, mode):
        new_url, _, _2 = self.do_create_new(coll_name, rec_name, wb_url, mode)
        return self.redirect(new_url)

    def do_create_new(self, coll_name, rec_title, wb_url, mode, desc=''):
        if mode == 'record':
            result = self.check_remote_archive(wb_url, mode)
            if result:
                mode, wb_url = result

        user = self.access.init_session_user()

        if user.is_anon():
            if self.anon_disabled:
                self.flash_message('Sorry, anonymous recording is not available.')
                self.redirect('/')
                return

            coll_name = 'temp'
            coll_title = 'Temporary Collection'

        else:
            coll_title = coll_name
            coll_name = self.sanitize_title(coll_title)

        collection = user.get_collection_by_name(coll_name)
        if not collection:
            collection = user.create_collection(coll_name, title=coll_title)

        recording = self._create_new_rec(collection, rec_title, mode, desc=desc)

        if mode.startswith('extract:'):
            patch_recording = self._create_new_rec(collection,
                                                   self.patch_of_name(recording['title']),
                                                   'patch')

            recording.set_patch_recording(patch_recording)

            patch_rec_name = patch_recording.my_id
        else:
            patch_rec_name = ''

        new_url = '/{user}/{coll}/{rec}/{mode}/{url}'.format(user=user.my_id,
                                                             coll=collection.name,
                                                             rec=recording.name,
                                                             mode=mode,
                                                             url=wb_url)
        return new_url, recording.my_id, patch_rec_name

    def redir_set_session(self):
        full_path = request.environ['SCRIPT_NAME'] + request.environ['PATH_INFO']
        full_path = self.add_query(full_path)
        self.redir_host(None, '/_set_session?path=' + quote(full_path))

    def _create_new_rec(self, collection, title, mode, desc=''):
        #rec_name = self.sanitize_title(title) if title else ''
        rec_type = 'patch' if mode == 'patch' else None
        return collection.create_recording(title=title,
                                           desc=desc,
                                           rec_type=rec_type)

    def patch_of_name(self, name):
        return 'Patch of ' + name

    def handle_routing(self, wb_url, user, coll_name, rec_name, type,
                       is_embed=False,
                       is_display=False,
                       sources='',
                       inv_sources='',
                       redir_route=None):

        wb_url = self._full_url(wb_url)
        if user == '_new' and redir_route:
            return self.do_create_new_and_redir(coll_name, rec_name, wb_url, redir_route)

        sesh = self.get_session()

        remote_ip = None
        frontend_cache_header = None
        patch_recording = None

        the_user, collection, recording = self.user_manager.get_user_coll_rec(user, coll_name, rec_name)

        if not the_user:
            msg = 'not_found' if user == 'api' else 'no_such_user'
            self._raise_error(404, msg)

        coll = collection.my_id if collection else None
        rec = recording.my_id if recording else None

        if type in self.MODIFY_MODES:
            if sesh.is_new() and self.is_content_request():
                self.redir_set_session()

            if not recording:
                self._redir_if_sanitized(self.sanitize_title(rec_name),
                                         rec_name,
                                         wb_url)

                # don't auto create recording for inner frame w/o accessing outer frame
                self._raise_error(404, 'no_such_recording')

            elif not recording.is_open():
                # force creation of new recording as this one is closed
                self._raise_error(400, 'recording_not_open')

            collection.access.assert_can_write_coll(collection)

            if the_user.is_out_of_space():
                self._raise_error(402, 'out_of_space')

            remote_ip = self._get_remote_ip()

            remote_ip = self.check_rate_limit(the_user, remote_ip)

            if inv_sources and inv_sources != '*':
                #patch_rec_name = self.patch_of_name(rec, True)
                patch_recording = recording.get_patch_recording()
                #patch_recording = collection.get_recording_by_name(patch_rec_name)

        if type in ('replay-coll', 'replay'):
            if not collection:
                self._redir_if_sanitized(self.sanitize_title(coll_name),
                                         coll_name,
                                         wb_url)

                if sesh.is_new() and self.is_content_request():
                    self.redir_set_session()
                else:
                    self._raise_error(404, 'no_such_collection')

            access = self.access.check_read_access_public(collection)

            if not access:
                if sesh.is_new() and self.is_content_request():
                    self.redir_set_session()
                else:
                    self._raise_error(404, 'no_such_collection')

            if access != 'public':
                frontend_cache_header = ('Cache-Control', 'private')

            if type == 'replay':
                if not recording:
                    self._raise_error(404, 'no_such_recording')

        request.environ['SCRIPT_NAME'] = quote(request.environ['SCRIPT_NAME'], safe='/:')

        wb_url = self._context_massage(wb_url)

        wb_url_obj = WbUrl(wb_url)

        is_top_frame = (wb_url_obj.mod == self.frame_mod or wb_url_obj.mod.startswith('$br:'))

        if type == 'record' and is_top_frame:
            result = self.check_remote_archive(wb_url, type, wb_url_obj)
            if result:
                mode, wb_url = result
                new_url = '/{user}/{coll}/{rec}/{mode}/{url}'.format(user=user,
                                                                     coll=coll_name,
                                                                     rec=rec_name,
                                                                     mode=mode,
                                                                     url=wb_url)
                return self.redirect(new_url)

        elif type == 'replay-coll' and not is_top_frame:
            collection.sync_coll_index(exists=False, do_async=False)

        kwargs = dict(user=user,
                      id=sesh.get_id(),
                      coll=coll,
                      rec=rec,
                      coll_name=quote(coll_name),
                      rec_name=quote(rec_name, safe='/*'),

                      the_user=the_user,
                      collection=collection,
                      recording=recording,
                      patch_recording=patch_recording,

                      type=type,
                      sources=sources,
                      inv_sources=inv_sources,
                      patch_rec=patch_recording.my_id if patch_recording else None,
                      ip=remote_ip,
                      is_embed=is_embed,
                      is_display=is_display)

        # top-frame replay but through a proxy, redirect to original
        if is_top_frame and 'wsgiprox.proxy_host' in request.environ:
            kwargs['url'] = wb_url_obj.url
            kwargs['request_ts'] = wb_url_obj.timestamp
            self.browser_mgr.update_local_browser(kwargs)
            return redirect(wb_url_obj.url)

        try:
            self.check_if_content(wb_url_obj, request.environ, is_top_frame)

            resp = self.render_content(wb_url, kwargs, request.environ)

            if frontend_cache_header:
                resp.status_headers.headers.append(frontend_cache_header)

            resp = HTTPResponse(body=resp.body,
                                status=resp.status_headers.statusline,
                                headers=resp.status_headers.headers)

            return resp

        except UpstreamException as ue:
            err_context = {
                'url': ue.url,
                'status': ue.status_code,
                'error': ue.msg.get('error'),
                'timestamp': wb_url_obj.timestamp if wb_url_obj else '',
                'user': user,
                'coll': coll_name,
                'rec': rec_name,
                'type': type,
                'app_host': self.app_host,
            }

            @self.jinja2_view('content_error.html')
            def handle_error(error):
                response.status = ue.status_code
                return error

            if self.content_error_redirect:
                return redirect(self.content_error_redirect + '?' + urlencode(err_context), code=307)
            else:
                return handle_error(err_context)

    def check_if_content(self, wb_url, environ, is_top_frame):
        if not wb_url.is_replay():
            return

        if not self.content_host:
            return

        if is_top_frame:
            if self.is_content_request():
                self.redir_host(self.app_host)
        else:
            if not self.is_content_request():
                self.redir_host(self.content_host)

    def _filter_headers(self, type, status_headers):
        if type in ('replay', 'replay-coll'):
            new_headers = []
            for name, value in status_headers.headers:
                if name.lower() != 'set-cookie':
                    new_headers.append((name, value))

            status_headers.headers = new_headers

    def _inject_nocache_headers(self, status_headers, kwargs):
        if 'browser_id' in kwargs:
            status_headers.headers.append(
                ('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate')
            )

    def _redir_if_sanitized(self, id, title, wb_url):
        if id != title:
            target = request.script_name.replace(title, id)
            target += wb_url
            self.redirect(target)

    def _context_massage(self, wb_url):
        # reset HTTP_COOKIE to guarded request_cookie for LiveRewriter
        if 'webrec.request_cookie' in request.environ:
            request.environ['HTTP_COOKIE'] = request.environ['webrec.request_cookie']

        try:
            del request.environ['HTTP_X_PUSH_STATE_REQUEST']
        except:
            pass

        #TODO: generalize
        if wb_url.endswith('&spf=navigate') and wb_url.startswith('mp_/https://www.youtube.com'):
            wb_url = wb_url.replace('&spf=navigate', '')

        return wb_url

    def add_query(self, url):
        if request.query_string:
            url += '?' + request.query_string

        return url

    def _full_url(self, url=''):
        request_uri = request.environ.get('REQUEST_URI')
        script_name = request.environ.get('SCRIPT_NAME', '') + '/'
        if request_uri and script_name and request_uri.startswith(script_name):
            url = request_uri[len(script_name):]
        else:
            if not url:
                url = environ.request.environ['SCRIPT_NAME'] + environ.request.environ['PATH_INFO']

            url = self.add_query(url)

        return url

    def get_cookie_key(self, kwargs):
        sesh_id = self.get_session().get_id()
        return self.dyn_stats.get_cookie_key(kwargs['the_user'],
                                             kwargs['collection'],
                                             kwargs['recording'],
                                             sesh_id=sesh_id)

    def add_cookie(self, user, collection, recording, name, value, domain):
        sesh_id = self.get_session().get_id()
        key = self.dyn_stats.get_cookie_key(user,
                                            collection,
                                            recording,
                                            sesh_id=sesh_id)

        self.cookie_tracker.add_cookie(key, domain, name, value)

    def _get_remote_ip(self):
        remote_ip = request.environ.get('HTTP_X_REAL_IP')
        remote_ip = remote_ip or request.environ.get('REMOTE_ADDR', '')
        remote_ip = remote_ip.rsplit('.', 1)[0]
        return remote_ip

    def check_rate_limit(self, user, remote_ip):
        # check rate limit and return ip used for further limiting
        # if skipping limit, return empty string to avoid incrementing
        # rate counter for this request
        res = user.is_rate_limited(remote_ip)
        if res == True:
            self._raise_error(402, 'rate_limit_exceeded')
        # if None, then no rate limit at all, return empty string
        elif res == None:
            return ''

        else:
            return remote_ip

    ## RewriterApp overrides
    def get_base_url(self, wb_url, kwargs):
        # for proxy mode, 'upstream_url' already provided
        # just use that
        #base_url = kwargs.get('upstream_url')
        #if base_url:
        #    base_url = base_url.format(**kwargs)
        #    return base_url

        type = kwargs['type']

        base_url = self.paths[type].format(record_host=self.record_host,
                                           replay_host=self.replay_host,
                                           live_host=self.live_host,
                                           **kwargs)

        return base_url

    def process_query_cdx(self, cdx, wb_url, kwargs):
        rec = kwargs.get('rec')
        if not rec or rec == '*':
            rec = cdx['source'].split(':', 1)[0]

        cdx['rec'] = rec

    def get_host_prefix(self, environ):
        if self.content_host and 'wsgiprox.proxy_host' not in environ:
            return environ['wsgi.url_scheme'] + '://' + self.content_host
        else:
            return super(ContentController, self).get_host_prefix(environ)

    def get_top_url(self, full_prefix, wb_url, cdx, kwargs):
        if wb_url.mod != self.frame_mod and self.content_host != self.app_host:
            full_prefix = full_prefix.replace(self.content_host, self.app_host)

        return super(ContentController, self).get_top_url(full_prefix, wb_url, cdx, kwargs)

    def get_top_frame_params(self, wb_url, kwargs):
        type = kwargs['type']

        top_prefix = super(ContentController, self).get_host_prefix(request.environ)
        top_prefix += self.get_rel_prefix(request.environ)

        if type == 'live':
            return {'curr_mode': type,
                    'is_embed': kwargs.get('is_embed'),
                    'is_display': kwargs.get('is_display'),
                    'top_prefix': top_prefix}

        # refresh cookie expiration,
        # disable until can guarantee cookie is not changed!
        #self.get_session().update_expires()

        info = self.get_content_inject_info(kwargs['the_user'],
                                            kwargs['collection'],
                                            kwargs['recording'])

        return {'info': info,
                'curr_mode': type,

                'user': kwargs['user'],

                'coll': kwargs['coll'],
                'coll_name': kwargs['coll_name'],
                'coll_title': info.get('coll_title', ''),

                'rec': kwargs['rec'],
                'rec_name': kwargs['rec_name'],
                'rec_title': info.get('rec_title', ''),

                'is_embed': kwargs.get('is_embed'),
                'is_display': kwargs.get('is_display'),

                'top_prefix': top_prefix,

                'sources': kwargs.get('sources'),
                'inv_sources': kwargs.get('inv_sources'),
               }

    def _add_custom_params(self, cdx, resp_headers, kwargs, record):
        try:
            self._add_stats(cdx, resp_headers, kwargs, record)
        except:
            import traceback
            traceback.print_exc()

    def _add_stats(self, cdx, resp_headers, kwargs, record):
        type_ = kwargs['type']

        if type_ == 'replay-coll':
            content_len = record.rec_headers.get_header('Content-Length')
            if content_len is not None:
                Stats(self.redis).incr_replay(int(content_len), kwargs['user'])

        if type_ in ('record', 'live'):
            return

        source = cdx.get('source')
        if not source:
            return

        if source == 'local':
            source = 'replay'

        if source == 'replay' and type_ == 'patch':
            return

        orig_source = cdx.get('orig_source_id')
        if orig_source:
            source = orig_source

        ra_rec = None
        ra_recording = None

        # set source in recording-key
        if type_ in self.MODIFY_MODES:
            skip = resp_headers.get('Recorder-Skip')

            if not skip and source not in ('live', 'replay'):
                ra_rec = unquote(resp_headers.get('Recorder-Rec', ''))
                ra_rec = ra_rec or kwargs['rec']

                recording = kwargs.get('recording')
                patch_recording = kwargs.get('patch_recording')

                if recording and ra_rec == recording.my_id:
                    ra_recording = recording
                elif patch_recording and ra_rec == patch_recording.my_id:
                    ra_recording = patch_recording

        url = cdx.get('url')
        referrer = request.environ.get('HTTP_REFERER')

        if not referrer:
            referrer = url
        elif ('wsgiprox.proxy_host' not in request.environ and
            request.environ.get('HTTP_HOST') in referrer):
            referrer = url

        self.dyn_stats.update_dyn_stats(url, kwargs, referrer, source, ra_recording)

    def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs):
        # test if request specifies a containerized browser
        if wb_url.mod.startswith('$br:'):
            return self.handle_browser_embed(wb_url, kwargs)

        return RewriterApp.handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs)

    def handle_browser_embed(self, wb_url, kwargs):
        #handle cbrowsers
        browser_id = wb_url.mod.split(':', 1)[1]

        kwargs['browser_can_write'] = '1' if self.access.can_write_coll(kwargs['collection']) else '0'

        kwargs['remote_ip'] = self._get_remote_ip()

        kwargs['url'] = wb_url.url
        kwargs['timestamp'] = wb_url.timestamp
        kwargs['browser'] = browser_id

        # container redis info
        inject_data = self.browser_mgr.request_new_browser(kwargs)
        if 'error_message' in inject_data:
            self._raise_error(400, inject_data['error_message'])

        inject_data.update(self.get_top_frame_params(wb_url, kwargs))
        inject_data['wb_url'] = wb_url

        @self.jinja2_view('browser_embed.html')
        def browser_embed(data):
            return data

        return browser_embed(inject_data)

    def get_content_inject_info(self, user, collection, recording):
        info = {}

        # recording
        if recording:
            info['rec_id'] = recording.my_id
            #info['rec_title'] = quote(recording.get_title(), safe='/ ')
            info['size'] = recording.size

        else:
            info['size'] = collection.size

        # collection
        info['coll_id'] = collection.name
        info['coll_title'] = quote(collection.get_prop('title', collection.name), safe='/ ')

        info['coll_desc'] = quote(collection.get_prop('desc', ''))

        info['size_remaining'] = user.get_size_remaining()

        return info

    def construct_wburl(self, url, ts, browser, is_content):
        prefix = ts or ''

        if browser:
            prefix += '$br:' + browser
        elif is_content:
            prefix += 'mp_'

        if prefix:
            return prefix + '/' + url
        else:
            return url
Пример #12
0
    def __init__(self):
        init_logging()

        config = load_wr_config()

        app = BaseWarcServer(debug=True)

        redis_base = os.environ['REDIS_BASE_URL'] + '/'

        rec_url = redis_base + config['cdxj_key_templ']
        coll_url = redis_base + config['cdxj_coll_key_templ']
        warc_url = redis_base + config['warc_key_templ']
        rec_list_key = config['rec_list_key_templ']

        cache_proxy_url = os.environ.get('CACHE_PROXY_URL', '')
        global PROXY_PREFIX
        PROXY_PREFIX = cache_proxy_url

        timeout = 20.0

        register_source(ProxyMementoIndexSource)
        #register_source(ProxyRemoteIndexSource)

        rec_redis_source = RedisMultiKeyIndexSource(timeout=timeout,
                                                    redis_url=rec_url)

        redis = rec_redis_source.redis
        coll_redis_source = RedisMultiKeyIndexSource(
            timeout=timeout,
            redis_url=coll_url,
            redis=redis,
            member_key_templ=rec_list_key)

        live_rec = DefaultResourceHandler(
            SimpleAggregator({'live': LiveIndexSource()}, ), warc_url,
            cache_proxy_url)

        # Extractable archives (all available)
        wam_loader = WAMLoader()
        extractable_archives = wam_loader.all_archives

        # Extract Source
        extractor = GeventTimeoutAggregator(extractable_archives,
                                            timeout=timeout)
        extract_primary = DefaultResourceHandler(extractor, warc_url,
                                                 cache_proxy_url)

        # Patch fallback archives
        fallback_archives = self.filter_archives(
            extractable_archives, config['patch_archives_index'])

        # patch + live
        #patch_archives = fallback_archives.copy()
        patch_archives = fallback_archives
        patch_archives['live'] = LiveIndexSource()

        extractor2 = GeventTimeoutAggregator(patch_archives,
                                             timeout=timeout,
                                             sources_key='inv_sources',
                                             invert_sources=True)

        extract_other = DefaultResourceHandler(extractor2, warc_url,
                                               cache_proxy_url)

        patcher = GeventTimeoutAggregator(patch_archives, timeout=timeout)
        patch_rec = DefaultResourceHandler(patcher, warc_url, cache_proxy_url)

        # Single Rec Replay
        replay_rec = DefaultResourceHandler(rec_redis_source, warc_url,
                                            cache_proxy_url)

        # Coll Replay
        replay_coll = DefaultResourceHandler(coll_redis_source, warc_url,
                                             cache_proxy_url)

        app.add_route('/live', live_rec)
        app.add_route('/extract', HandlerSeq([extract_primary, extract_other]))
        app.add_route('/replay', replay_rec)
        app.add_route('/replay-coll', replay_coll)
        app.add_route('/patch', HandlerSeq([replay_coll, patch_rec]))

        self.app = app
Пример #13
0
class ContentController(BaseController, RewriterApp):
    DEF_REC_NAME = 'Recording Session'

    WB_URL_RX = re.compile('(([\d*]*)([a-z]+_|[$][a-z0-9:.-]+)?/)?([a-zA-Z]+:)?//.*')

    MODIFY_MODES = ('record', 'patch', 'extract')

    BUNDLE_PREFIX = '/static/bundle/'

    def __init__(self, *args, **kwargs):
        BaseController.__init__(self, *args, **kwargs)

        config = kwargs['config']

        self.content_error_redirect = os.environ.get('CONTENT_ERROR_REDIRECT')

        config['csp-header'] = self.get_csp_header()

        self.browser_mgr = kwargs['browser_mgr']

        RewriterApp.__init__(self,
                             framed_replay=True,
                             jinja_env=kwargs['jinja_env'],
                             config=config)

        self.paths = config['url_templates']

        self.cookie_tracker = CookieTracker(self.redis)

        self.record_host = os.environ['RECORD_HOST']
        self.live_host = os.environ['WARCSERVER_HOST']
        self.replay_host = os.environ.get('WARCSERVER_PROXY_HOST')
        if not self.replay_host:
            self.replay_host = self.live_host
        self.session_redirect_host = os.environ.get('SESSION_REDIRECT_HOST')

        self.wam_loader = WAMLoader()
        self._init_client_archive_info()

        self.dyn_stats = DynStats(self.redis, config)

        # BEGIN PERMA CUSTOMIZATION
        # Perma occasionally refuses to play back the content of certain
        # URLs or domains. This is a temporary workaround, until we devise
        # a more universally satisfactory solution.
        self.refuse_playback = [url for url in os.environ.get('REFUSE_PLAYBACK', '').split(',') if url]
        # END PERMA CUSTOMIZATION

    def _init_client_archive_info(self):
        self.client_archives = {}
        for pk, archive in self.wam_loader.replay_info.items():
            info = {'name': archive['name'],
                    'about': archive['about'],
                    'prefix': archive['replay_prefix'],
                   }
            if archive.get('parse_collection'):
                info['parse_collection'] = True

            self.client_archives[pk] = info

    def get_csp_header(self):
        csp = "default-src 'unsafe-eval' 'unsafe-inline' 'self' data: blob: mediastream: ws: wss: "
        if self.app_host and self.content_host != self.app_host:
            csp += self.app_host + '/_set_session'

        if self.content_error_redirect:
            csp += ' ' + self.content_error_redirect

        csp += "; form-action 'self'"
        return csp

    def init_routes(self):
        wr_api_spec.set_curr_tag('External Archives')

        @self.app.get('/api/v1/client_archives')
        def get_client_archives():
            return self.client_archives

        wr_api_spec.set_curr_tag('Browsers')

        @self.app.get('/api/v1/create_remote_browser')
        def create_browser():
            """ Api to launch remote browser instances
            """
            sesh = self.get_session()

            if sesh.is_new() and self.is_content_request():
                self._raise_error(403, 'invalid_browser_request')

            browser_id = request.query['browser']

            Stats(self.redis).incr_browser(browser_id)

            user = self.get_user(redir_check=False)

            data = request.query

            coll_name = data.getunicode('coll', '')
            rec = data.get('rec', '')

            mode = data.get('mode', '')

            url = data.getunicode('url', '')
            timestamp = data.get('timestamp', '')

            sources = ''
            inv_sources = ''
            patch_rec = ''

            collection = user.get_collection_by_name(coll_name)
            recording = collection.get_recording(rec)

            if not collection:
                self._raise_error(404, 'no_such_collection')

            if mode == 'extract':
                # Extract from All, Patch from None
                sources = '*'
                inv_sources = '*'
            elif mode.startswith('extract:'):
                # Extract from One, Patch from all but one
                sources = mode.split(':', 1)[1]
                inv_sources = sources
                # load patch recording also
                #patch_recording = collection.get_recording(recording['patch_rec'])
                if recording:
                    patch_rec = recording.get_prop('patch_rec')

                mode = 'extract'
            elif mode.startswith('extract_only:'):
                # Extract from one only, no patching
                sources = mode.split(':', 1)[1]
                inv_sources = '*'
                mode = 'extract'

            if mode in self.MODIFY_MODES:
                if not recording:
                    return self._raise_error(404, 'no_such_recording')

                #rec = recording.my_id
            elif mode in ('replay', 'replay-coll'):
                rec = '*'
            else:
                return self._raise_error(400, 'invalid_mode')


            browser_can_write = '1' if self.access.can_write_coll(collection) else '0'

            remote_ip = self._get_remote_ip()

            # build kwargs
            kwargs = dict(user=user.name,
                          id=sesh.get_id(),
                          coll=collection.my_id,
                          rec=rec,
                          coll_name=quote(coll_name),
                          #rec_name=quote(rec_name, safe='/*'),

                          type=mode,
                          sources=sources,
                          inv_sources=inv_sources,
                          patch_rec=patch_rec,

                          remote_ip=remote_ip,
                          ip=remote_ip,

                          browser=browser_id,
                          url=url,
                          request_ts=timestamp,

                          browser_can_write=browser_can_write)

            data = self.browser_mgr.request_new_browser(kwargs)

            if 'error_message' in data:
                self._raise_error(400, data['error_message'])

            return data

        # UPDATE REMOTE BROWSER CONFIG
        @self.app.get('/api/v1/update_remote_browser/<reqid>')
        def update_remote_browser(reqid):
            user, collection = self.load_user_coll(api=True)

            timestamp = request.query.getunicode('timestamp')
            type_ = request.query.getunicode('type')

            # if switching mode, need to have write access
            # for timestamp, only read access
            if type_:
                self.access.assert_can_write_coll(collection)
            else:
                self.access.assert_can_read_coll(collection)

            return self.browser_mgr.update_remote_browser(reqid,
                                                          type_=type_,
                                                          timestamp=timestamp)

        # REDIRECTS
        @self.app.route('/record/<wb_url:path>', method='ANY')
        def redir_new_temp_rec(wb_url):
            coll_name = 'temp'
            rec_title = self.DEF_REC_NAME
            wb_url = self.add_query(wb_url)
            return self.do_create_new_and_redir(coll_name, rec_title, wb_url, 'record')

        @self.app.route('/$record/<coll_name>/<rec_title>/<wb_url:path>', method='ANY')
        def redir_new_record(coll_name, rec_title, wb_url):
            wb_url = self.add_query(wb_url)
            return self.do_create_new_and_redir(coll_name, rec_title, wb_url, 'record')

        # API NEW
        wr_api_spec.set_curr_tag('Recordings')

        @self.app.post('/api/v1/new')
        def api_create_new():
            self.redir_host()

            url = request.json.get('url')
            coll = request.json.get('coll')
            mode = request.json.get('mode')

            desc = request.json.get('desc', '')

            browser = request.json.get('browser')
            is_content = request.json.get('is_content') and not browser
            timestamp = request.json.get('timestamp')

            wb_url = self.construct_wburl(url, timestamp, browser, is_content)

            host = self.content_host if is_content else self.app_host
            if not host:
                host = request.urlparts.netloc

            full_url = request.environ['wsgi.url_scheme'] + '://' + host

            url, rec, patch_rec = self.do_create_new(coll, '', wb_url, mode, desc=desc)

            full_url += url

            return {'url': full_url,
                    'user': self.access.session_user.name,
                    'rec_name': rec,
                    'patch_rec_name': patch_rec
                   }

        # COOKIES
        wr_api_spec.set_curr_tag('Cookies')

        @self.app.post('/api/v1/auth/cookie')
        def add_cookie():
            user, collection = self.load_user_coll()

            data = request.json or {}

            rec_name = data.get('rec', '*')
            recording = collection.get_recording(rec_name)

            name = data.get('name')
            value = data.get('value')
            domain = data.get('domain')

            if not domain:
                return self._raise_error(400, 'domain_missing')

            self.add_cookie(user, collection, recording, name, value, domain)

            return {'success': domain}

        # PROXY
        @self.app.route('/_proxy/<url:path>', method='ANY')
        def do_proxy(url):
            return self.do_proxy(url)

        # PROXY with CORS
        @self.app.route('/proxy-fetch/<url:path>', method='GET')
        def do_proxy_fetch_cors(url):
            res = self.do_proxy(url)

            if 'HTTP_ORIGIN' in request.environ:
                self.set_options_headers(None, None, res)

            return res

        @self.app.route('/api/v1/remote/put-record', method='PUT')
        def do_put_record():
            return self.do_put_record()

        # LIVE DEBUG
        #@self.app.route('/live/<wb_url:path>', method='ANY')
        def live(wb_url):
            request.path_shift(1)

            return self.handle_routing(wb_url, user='******', coll='temp', rec='', type='live')

        # EMDED
        @self.app.route('/_embed/<user>/<coll>/<wb_url:path>', method='ANY')
        def embed_replay(user, coll, wb_url):
            request.path_shift(3)
            #return self.do_replay_coll_or_rec(user, coll, wb_url, is_embed=True)
            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll',
                                       is_embed=True)


        # DISPLAY
        @self.app.route('/_embed_noborder/<user>/<coll>/<wb_url:path>', method='ANY')
        def embed_replay(user, coll, wb_url):
            request.path_shift(3)
            #return self.do_replay_coll_or_rec(user, coll, wb_url, is_embed=True,
            #                                  is_display=True)
            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll',
                                       is_embed=True, is_display=True)


        # CONTENT ROUTES
        # Record
        @self.app.route('/<user>/<coll>/<rec>/record/<wb_url:path>', method='ANY')
        def do_record(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='record', redir_route='record')

        # Patch
        @self.app.route('/<user>/<coll>/<rec>/patch/<wb_url:path>', method='ANY')
        def do_patch(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='patch', redir_route='patch')

        # Extract
        @self.app.route('/<user>/<coll>/<rec>/extract\:<archive>/<wb_url:path>', method='ANY')
        def do_extract_patch_archive(user, coll, rec, wb_url, archive):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='extract',
                                       sources=archive,
                                       inv_sources=archive,
                                       redir_route='extract:' + archive)

        @self.app.route('/<user>/<coll>/<rec>/extract_only\:<archive>/<wb_url:path>', method='ANY')
        def do_extract_only_archive(user, coll, rec, wb_url, archive):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='extract',
                                       sources=archive,
                                       inv_sources='*',
                                       redir_route='extract_only:' + archive)

        @self.app.route('/<user>/<coll>/<rec>/extract/<wb_url:path>', method='ANY')
        def do_extract_all(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='extract',
                                       sources='*',
                                       inv_sources='*',
                                       redir_route='extract')

        # REPLAY
        # Replay List
        @self.app.route('/<user>/<coll>/list/<list_id>/<bk_id>/<wb_url:path>', method='ANY')
        def do_replay_rec(user, coll, list_id, bk_id, wb_url):
            request.path_shift(5)

            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll')

        # Replay Recording
        @self.app.route('/<user>/<coll>/<rec>/replay/<wb_url:path>', method='ANY')
        def do_replay_rec(user, coll, rec, wb_url):
            request.path_shift(4)

            return self.handle_routing(wb_url, user, coll, rec, type='replay')

        # Replay Coll
        @self.app.route('/<user>/<coll>/<wb_url:path>', method='ANY')
        def do_replay_coll(user, coll, wb_url):
            request.path_shift(2)
            # BEGIN PERMA CUSTOMIZATION
            # see self.refuse_playback for more information
            if any(url in wb_url for url in self.refuse_playback):
                self._raise_error(404, 'no_such_recording')
            # END PERMA CUSTOMIZATION
            return self.handle_routing(wb_url, user, coll, '*', type='replay-coll')

        # Session redir
        @self.app.get(['/_set_session'])
        def set_sesh():
            sesh = self.get_session()

            if self.is_content_request():
                cookie = request.query.getunicode('cookie')
                sesh.set_id_from_cookie(cookie)
                return self.redirect(request.query.getunicode('path'))

            else:
                url = request.environ['wsgi.url_scheme'] + '://' + self.content_host
                self.set_options_headers(self.content_host, self.app_host)
                response.headers['Cache-Control'] = 'no-cache'

                cookie = request.query.getunicode('webrec.sesh_cookie')

                # otherwise, check if content cookie provided
                # already have same session, just redirect back
                # likely a real 404 not found
                if sesh.is_same_session(request.query.getunicode('content_cookie')):
                    redirect(url + request.query.getunicode('path'))

                # if anon, ensure session is persisted before setting content session
                # generate cookie to pass
                if not cookie:
                    self.access.init_session_user(persist=True)
                    cookie = sesh.get_cookie()

                cookie = quote(cookie)
                url += '/_set_session?{0}&cookie={1}'.format(request.environ['QUERY_STRING'], cookie)
                redirect(url)

        # OPTIONS
        @self.app.route('/_set_session', method='OPTIONS')
        def set_sesh_options():
            self.set_options_headers(self.content_host, self.app_host)
            return ''

        @self.app.route('/_clear_session', method='OPTIONS')
        def set_clear_options():
            self.set_options_headers(self.app_host, self.content_host)
            return ''

        # CLEAR CONTENT SESSION
        @self.app.get(['/_clear_session'])
        def clear_sesh():
            self.set_options_headers(self.app_host, self.content_host)
            response.headers['Cache-Control'] = 'no-cache'

            if not self.is_content_request():
                self._raise_error(400, 'invalid_request')

            try:
                # delete session (will updated cookie)
                self.get_session().delete()
                return {'success': 'logged_out'}

            except Exception as e:
                self._raise_error(400, 'invalid_request')

    def do_proxy(self, url):
        info = self.browser_mgr.init_remote_browser_session()
        if not info:
            return self._raise_error(400, 'invalid_connection_source')

        try:
            kwargs = info
            user = info['the_user']
            collection = info['collection']
            recording = info['recording']

            if kwargs['type'] == 'replay-coll':
                collection.sync_coll_index(exists=False,  do_async=False)

            url = self.add_query(url)

            kwargs['url'] = url
            wb_url = kwargs.get('request_ts', '') + 'bn_/' + url

            request.environ['webrec.template_params'] = kwargs
            request.environ['pywb.static_prefix'] = self.BUNDLE_PREFIX

            remote_ip = info.get('remote_ip')

            if remote_ip and info['type'] in self.MODIFY_MODES:
                remote_ip = self.check_rate_limit(user, remote_ip)
                kwargs['ip'] = remote_ip

            resp = self.render_content(wb_url, kwargs, request.environ)

            resp = HTTPResponse(body=resp.body,
                                status=resp.status_headers.statusline,
                                headers=resp.status_headers.headers)

            return resp

        except Exception as e:
            import traceback
            traceback.print_exc()

            @self.jinja2_view('content_error.html')
            def handle_error(status_code, err_body, environ):
                response.status = status_code
                kwargs['url'] = url
                kwargs['status'] = status_code
                kwargs['err_body'] = err_body
                kwargs['host_prefix'] = self.get_host_prefix(environ)
                kwargs['proxy_magic'] = environ.get('wsgiprox.proxy_host', '')
                return kwargs

            status_code = 500
            if hasattr(e, 'status_code'):
                status_code = e.status_code

            if hasattr(e, 'body'):
                err_body = e.body
            elif hasattr(e, 'msg'):
                err_body = e.msg
            else:
                err_body = ''

            return handle_error(status_code, err_body, request.environ)

    def check_remote_archive(self, wb_url, mode, wb_url_obj=None):
        wb_url_obj = wb_url_obj or WbUrl(wb_url)

        res = self.wam_loader.find_archive_for_url(wb_url_obj.url)
        if not res:
            return

        pk, new_url, id_ = res

        mode = 'extract:' + id_

        new_url = WbUrl(new_url).to_str(mod=wb_url_obj.mod)

        return mode, new_url

    def do_put_record(self):
        reqid = request.query.getunicode('reqid')
        info = self.browser_mgr.init_remote_browser_session(reqid=reqid)
        if not info:
            return self._raise_error(400, 'invalid_connection_source')

        user = info['the_user']
        collection = info['collection']
        recording = info['recording']

        kwargs = dict(user=user.name,
                      coll=collection.my_id,
                      rec=recording.my_id,
                      type='put_record')

        url = request.query.getunicode('target_uri')

        params = {'url': url}

        upstream_url = self.get_upstream_url('', kwargs, params)

        headers = {'Content-Type': request.environ.get('CONTENT_TYPE', 'text/plain')}

        r = requests.put(upstream_url,
                         data=request.body,
                         headers=headers,
                        )
        try:
            res = r.json()
            if res['success'] != 'true':
                print(res)
                return {'error_message': 'put_record_failed'}

            warc_date = res.get('WARC-Date')

        except Exception as e:
            print(e)
            return {'error_message': 'put_record_failed'}

        return res

    def do_create_new_and_redir(self, coll_name, rec_name, wb_url, mode):
        new_url, _, _2 = self.do_create_new(coll_name, rec_name, wb_url, mode)
        return self.redirect(new_url)

    def do_create_new(self, coll_name, rec_title, wb_url, mode, desc=''):
        if mode == 'record':
            result = self.check_remote_archive(wb_url, mode)
            if result:
                mode, wb_url = result

        user = self.access.init_session_user()

        if user.is_anon():
            if self.anon_disabled:
                self.flash_message('Sorry, anonymous recording is not available.')
                self.redirect('/')
                return

            coll_name = 'temp'
            coll_title = 'Temporary Collection'

        else:
            coll_title = coll_name
            coll_name = self.sanitize_title(coll_title)

        collection = user.get_collection_by_name(coll_name)
        if not collection:
            collection = user.create_collection(coll_name, title=coll_title)

        recording = self._create_new_rec(collection, rec_title, mode, desc=desc)

        if mode.startswith('extract:'):
            patch_recording = self._create_new_rec(collection,
                                                   self.patch_of_name(recording['title']),
                                                   'patch')

            recording.set_patch_recording(patch_recording)

            patch_rec_name = patch_recording.my_id
        else:
            patch_rec_name = ''

        new_url = '/{user}/{coll}/{rec}/{mode}/{url}'.format(user=user.my_id,
                                                             coll=collection.name,
                                                             rec=recording.name,
                                                             mode=mode,
                                                             url=wb_url)
        return new_url, recording.my_id, patch_rec_name

    def redir_set_session(self):
        full_path = request.environ['SCRIPT_NAME'] + request.environ['PATH_INFO']
        full_path = self.add_query(full_path)
        self.redir_host(self.session_redirect_host, '/_set_session?path=' + quote(full_path))

    def _create_new_rec(self, collection, title, mode, desc=''):
        #rec_name = self.sanitize_title(title) if title else ''
        rec_type = 'patch' if mode == 'patch' else None
        return collection.create_recording(title=title,
                                           desc=desc,
                                           rec_type=rec_type)

    def patch_of_name(self, name):
        return 'Patch of ' + name

    def handle_routing(self, wb_url, user, coll_name, rec_name, type,
                       is_embed=False,
                       is_display=False,
                       sources='',
                       inv_sources='',
                       redir_route=None):

        wb_url = self._full_url(wb_url)
        if user == '_new' and redir_route:
            return self.do_create_new_and_redir(coll_name, rec_name, wb_url, redir_route)

        sesh = self.get_session()

        remote_ip = None
        frontend_cache_header = None
        patch_recording = None

        the_user, collection, recording = self.user_manager.get_user_coll_rec(user, coll_name, rec_name)

        if not the_user:
            msg = 'not_found' if user == 'api' else 'no_such_user'
            self._raise_error(404, msg)

        coll = collection.my_id if collection else None
        rec = recording.my_id if recording else None

        if type in self.MODIFY_MODES:
            if sesh.is_new() and self.is_content_request():
                self.redir_set_session()

            if not recording:
                self._redir_if_sanitized(self.sanitize_title(rec_name),
                                         rec_name,
                                         wb_url)

                # don't auto create recording for inner frame w/o accessing outer frame
                self._raise_error(404, 'no_such_recording')

            elif not recording.is_open():
                # force creation of new recording as this one is closed
                self._raise_error(400, 'recording_not_open')

            collection.access.assert_can_write_coll(collection)

            if the_user.is_out_of_space():
                self._raise_error(402, 'out_of_space')

            remote_ip = self._get_remote_ip()

            remote_ip = self.check_rate_limit(the_user, remote_ip)

            if inv_sources and inv_sources != '*':
                #patch_rec_name = self.patch_of_name(rec, True)
                patch_recording = recording.get_patch_recording()
                #patch_recording = collection.get_recording_by_name(patch_rec_name)

        if type in ('replay-coll', 'replay'):
            if not collection:
                self._redir_if_sanitized(self.sanitize_title(coll_name),
                                         coll_name,
                                         wb_url)

                if sesh.is_new() and self.is_content_request():
                    self.redir_set_session()
                else:
                    self._raise_error(404, 'no_such_collection')

            access = self.access.check_read_access_public(collection)

            if not access:
                if sesh.is_new() and self.is_content_request():
                    self.redir_set_session()
                else:
                    self._raise_error(404, 'no_such_collection')

            if access != 'public':
                frontend_cache_header = ('Cache-Control', 'private')

            if type == 'replay':
                if not recording:
                    self._raise_error(404, 'no_such_recording')

        request.environ['SCRIPT_NAME'] = quote(request.environ['SCRIPT_NAME'], safe='/:')

        wb_url = self._context_massage(wb_url)

        wb_url_obj = WbUrl(wb_url)

        is_top_frame = (wb_url_obj.mod == self.frame_mod or wb_url_obj.mod.startswith('$br:'))

        if type == 'record' and is_top_frame:
            result = self.check_remote_archive(wb_url, type, wb_url_obj)
            if result:
                mode, wb_url = result
                new_url = '/{user}/{coll}/{rec}/{mode}/{url}'.format(user=user,
                                                                     coll=coll_name,
                                                                     rec=rec_name,
                                                                     mode=mode,
                                                                     url=wb_url)
                return self.redirect(new_url)

        elif type == 'replay-coll' and not is_top_frame:
            collection.sync_coll_index(exists=False, do_async=False)

        kwargs = dict(user=user,
                      id=sesh.get_id(),
                      coll=coll,
                      rec=rec,
                      coll_name=quote(coll_name),
                      rec_name=quote(rec_name, safe='/*'),

                      the_user=the_user,
                      collection=collection,
                      recording=recording,
                      patch_recording=patch_recording,

                      type=type,
                      sources=sources,
                      inv_sources=inv_sources,
                      patch_rec=patch_recording.my_id if patch_recording else None,
                      ip=remote_ip,
                      is_embed=is_embed,
                      is_display=is_display)

        # top-frame replay but through a proxy, redirect to original
        if is_top_frame and 'wsgiprox.proxy_host' in request.environ:
            kwargs['url'] = wb_url_obj.url
            kwargs['request_ts'] = wb_url_obj.timestamp
            self.browser_mgr.update_local_browser(kwargs)
            return redirect(wb_url_obj.url)

        try:
            self.check_if_content(wb_url_obj, request.environ, is_top_frame)

            request.environ['pywb.static_prefix'] = self.BUNDLE_PREFIX

            resp = self.render_content(wb_url, kwargs, request.environ)

            if frontend_cache_header:
                resp.status_headers.headers.append(frontend_cache_header)

            resp = HTTPResponse(body=resp.body,
                                status=resp.status_headers.statusline,
                                headers=resp.status_headers.headers)

            return resp

        except UpstreamException as ue:
            err_context = {
                'url': ue.url,
                'status': ue.status_code,
                'error': ue.msg.get('error'),
                'timestamp': wb_url_obj.timestamp if wb_url_obj else '',
                'user': user,
                'coll': coll_name,
                'rec': rec_name,
                'type': type,
                'app_host': self.app_host,
            }

            @self.jinja2_view('content_error.html')
            def handle_error(error):
                response.status = ue.status_code
                return error

            if self.content_error_redirect:
                return redirect(self.content_error_redirect + '?' + urlencode(err_context), code=307)
            else:
                return handle_error(err_context)

    def check_if_content(self, wb_url, environ, is_top_frame):
        if not wb_url.is_replay():
            return

        if not self.content_host:
            return

        if is_top_frame:
            if self.is_content_request():
                self.redir_host(self.app_host)
        else:
            if not self.is_content_request():
                self.redir_host(self.content_host)

    def _filter_headers(self, type, status_headers):
        if type in ('replay', 'replay-coll'):
            new_headers = []
            for name, value in status_headers.headers:
                if name.lower() != 'set-cookie':
                    new_headers.append((name, value))

            status_headers.headers = new_headers

    def _inject_nocache_headers(self, status_headers, kwargs):
        if 'browser_id' in kwargs:
            status_headers.headers.append(
                ('Cache-Control', 'no-cache, no-store, max-age=0, must-revalidate')
            )

    def _redir_if_sanitized(self, id, title, wb_url):
        if id != title:
            target = request.script_name.replace(title, id)
            target += wb_url
            self.redirect(target)

    def _context_massage(self, wb_url):
        # reset HTTP_COOKIE to guarded request_cookie for LiveRewriter
        if 'webrec.request_cookie' in request.environ:
            request.environ['HTTP_COOKIE'] = request.environ['webrec.request_cookie']

        try:
            del request.environ['HTTP_X_PUSH_STATE_REQUEST']
        except:
            pass

        #TODO: generalize
        if wb_url.endswith('&spf=navigate') and wb_url.startswith('mp_/https://www.youtube.com'):
            wb_url = wb_url.replace('&spf=navigate', '')

        return wb_url

    def add_query(self, url):
        if request.query_string:
            url += '?' + request.query_string

        return url

    def _full_url(self, url=''):
        request_uri = request.environ.get('REQUEST_URI')
        script_name = request.environ.get('SCRIPT_NAME', '') + '/'
        if request_uri and script_name and request_uri.startswith(script_name):
            url = request_uri[len(script_name):]
        else:
            if not url:
                url = environ.request.environ['SCRIPT_NAME'] + environ.request.environ['PATH_INFO']

            url = self.add_query(url)

        return url

    def get_cookie_key(self, kwargs):
        sesh_id = self.get_session().get_id()
        return self.dyn_stats.get_cookie_key(kwargs['the_user'],
                                             kwargs['collection'],
                                             kwargs['recording'],
                                             sesh_id=sesh_id)

    def add_cookie(self, user, collection, recording, name, value, domain):
        sesh_id = self.get_session().get_id()
        key = self.dyn_stats.get_cookie_key(user,
                                            collection,
                                            recording,
                                            sesh_id=sesh_id)

        self.cookie_tracker.add_cookie(key, domain, name, value)

    def _get_remote_ip(self):
        remote_ip = request.environ.get('HTTP_X_REAL_IP')
        remote_ip = remote_ip or request.environ.get('REMOTE_ADDR', '')
        remote_ip = remote_ip.rsplit('.', 1)[0]
        return remote_ip

    def check_rate_limit(self, user, remote_ip):
        # check rate limit and return ip used for further limiting
        # if skipping limit, return empty string to avoid incrementing
        # rate counter for this request
        res = user.is_rate_limited(remote_ip)
        if res == True:
            self._raise_error(402, 'rate_limit_exceeded')
        # if None, then no rate limit at all, return empty string
        elif res == None:
            return ''

        else:
            return remote_ip

    ## RewriterApp overrides
    def get_base_url(self, wb_url, kwargs):
        # for proxy mode, 'upstream_url' already provided
        # just use that
        #base_url = kwargs.get('upstream_url')
        #if base_url:
        #    base_url = base_url.format(**kwargs)
        #    return base_url

        type = kwargs['type']

        base_url = self.paths[type].format(record_host=self.record_host,
                                           replay_host=self.replay_host,
                                           live_host=self.live_host,
                                           **kwargs)

        return base_url

    def process_query_cdx(self, cdx, wb_url, kwargs):
        rec = kwargs.get('rec')
        if not rec or rec == '*':
            rec = cdx['source'].split(':', 1)[0]

        cdx['rec'] = rec

    def get_host_prefix(self, environ):
        if self.content_host and 'wsgiprox.proxy_host' not in environ:
            return environ['wsgi.url_scheme'] + '://' + self.content_host
        else:
            return super(ContentController, self).get_host_prefix(environ)

    def get_top_url(self, full_prefix, wb_url, cdx, kwargs):
        if wb_url.mod != self.frame_mod and self.content_host != self.app_host:
            full_prefix = full_prefix.replace(self.content_host, self.app_host)

        return super(ContentController, self).get_top_url(full_prefix, wb_url, cdx, kwargs)

    def get_top_frame_params(self, wb_url, kwargs):
        type = kwargs['type']

        top_prefix = super(ContentController, self).get_host_prefix(request.environ)
        top_prefix += self.get_rel_prefix(request.environ)

        if type == 'live':
            return {'curr_mode': type,
                    'is_embed': kwargs.get('is_embed'),
                    'is_display': kwargs.get('is_display'),
                    'top_prefix': top_prefix}

        # refresh cookie expiration,
        # disable until can guarantee cookie is not changed!
        #self.get_session().update_expires()

        info = self.get_content_inject_info(kwargs['the_user'],
                                            kwargs['collection'],
                                            kwargs['recording'])

        return {'info': info,
                'curr_mode': type,

                'user': kwargs['user'],

                'coll': kwargs['coll'],
                'coll_name': kwargs['coll_name'],
                'coll_title': info.get('coll_title', ''),

                'rec': kwargs['rec'],
                'rec_name': kwargs['rec_name'],
                'rec_title': info.get('rec_title', ''),

                'is_embed': kwargs.get('is_embed'),
                'is_display': kwargs.get('is_display'),

                'top_prefix': top_prefix,

                'sources': kwargs.get('sources'),
                'inv_sources': kwargs.get('inv_sources'),
               }

    def _add_custom_params(self, cdx, resp_headers, kwargs, record):
        try:
            self._add_stats(cdx, resp_headers, kwargs, record)
        except:
            import traceback
            traceback.print_exc()

    def _add_stats(self, cdx, resp_headers, kwargs, record):
        type_ = kwargs['type']

        if type_ == 'replay-coll':
            content_len = record.rec_headers.get_header('Content-Length')
            if content_len is not None:
                Stats(self.redis).incr_replay(int(content_len), kwargs['user'])

        if type_ in ('record', 'live'):
            return

        source = cdx.get('source')
        if not source:
            return

        if source == 'local':
            source = 'replay'

        if source == 'replay' and type_ == 'patch':
            return

        orig_source = cdx.get('orig_source_id')
        if orig_source:
            source = orig_source

        ra_rec = None
        ra_recording = None

        # set source in recording-key
        if type_ in self.MODIFY_MODES:
            skip = resp_headers.get('Recorder-Skip')

            if not skip and source not in ('live', 'replay'):
                ra_rec = unquote(resp_headers.get('Recorder-Rec', ''))
                ra_rec = ra_rec or kwargs['rec']

                recording = kwargs.get('recording')
                patch_recording = kwargs.get('patch_recording')

                if recording and ra_rec == recording.my_id:
                    ra_recording = recording
                elif patch_recording and ra_rec == patch_recording.my_id:
                    ra_recording = patch_recording

        url = cdx.get('url')
        referrer = request.environ.get('HTTP_REFERER')

        if not referrer:
            referrer = url
        elif ('wsgiprox.proxy_host' not in request.environ and
            request.environ.get('HTTP_HOST') in referrer):
            referrer = url

        self.dyn_stats.update_dyn_stats(url, kwargs, referrer, source, ra_recording)

    def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs):
        # test if request specifies a containerized browser
        if wb_url.mod.startswith('$br:'):
            return self.handle_browser_embed(wb_url, kwargs)

        return RewriterApp.handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs)

    def handle_browser_embed(self, wb_url, kwargs):
        #handle cbrowsers
        browser_id = wb_url.mod.split(':', 1)[1]

        kwargs['browser_can_write'] = '1' if self.access.can_write_coll(kwargs['collection']) else '0'

        kwargs['remote_ip'] = self._get_remote_ip()

        kwargs['url'] = wb_url.url
        kwargs['timestamp'] = wb_url.timestamp
        kwargs['browser'] = browser_id

        # container redis info
        inject_data = self.browser_mgr.request_new_browser(kwargs)
        if 'error_message' in inject_data:
            self._raise_error(400, inject_data['error_message'])

        inject_data.update(self.get_top_frame_params(wb_url, kwargs))
        inject_data['wb_url'] = wb_url

        @self.jinja2_view('browser_embed.html')
        def browser_embed(data):
            return data

        return browser_embed(inject_data)

    def get_content_inject_info(self, user, collection, recording):
        info = {}

        # recording
        if recording:
            info['rec_id'] = recording.my_id
            #info['rec_title'] = quote(recording.get_title(), safe='/ ')
            info['size'] = recording.size

        else:
            info['size'] = collection.size

        # collection
        info['coll_id'] = collection.name
        info['coll_title'] = quote(collection.get_prop('title', collection.name), safe='/ ')

        info['coll_desc'] = quote(collection.get_prop('desc', ''))

        info['size_remaining'] = user.get_size_remaining()

        return info

    def construct_wburl(self, url, ts, browser, is_content):
        prefix = ts or ''

        if browser:
            prefix += '$br:' + browser
        elif is_content:
            prefix += 'mp_'

        if prefix:
            return prefix + '/' + url
        else:
            return url