示例#1
0
    def init_recorder(self, recorder_config):
        """Initialize the recording functionality of pywb. If recording_config is None this function is a no op"""
        if not recorder_config:
            self.recorder = None
            self.recorder_path = None
            return

        if isinstance(recorder_config, str):
            recorder_coll = recorder_config
            recorder_config = {}
        else:
            recorder_coll = recorder_config['source_coll']

        # TODO: support dedup
        dedup_index = None
        warc_writer = MultiFileWARCWriter(
            self.warcserver.archive_paths,
            max_size=int(recorder_config.get('rollover_size', 1000000000)),
            max_idle_secs=int(recorder_config.get('rollover_idle_secs', 600)),
            filename_template=recorder_config.get('filename_template'),
            dedup_index=dedup_index)

        self.recorder = RecorderApp(
            self.RECORD_SERVER % str(self.warcserver_server.port),
            warc_writer,
            accept_colls=recorder_config.get('source_filter'))

        recorder_server = GeventServer(self.recorder, port=0)

        self.recorder_path = self.RECORD_API % (recorder_server.port,
                                                recorder_coll)
示例#2
0
    def init_recorder(self, recorder_config):
        if not recorder_config:
            self.recorder = None
            self.recorder_path = None
            return

        if isinstance(recorder_config, str):
            recorder_coll = recorder_config
            recorder_config = {}
        else:
            recorder_coll = recorder_config['source_coll']

        # TODO: support dedup
        dedup_index = None
        warc_writer = MultiFileWARCWriter(
            self.warcserver.archive_paths,
            max_size=int(recorder_config.get('rollover_size', 1000000000)),
            max_idle_secs=int(recorder_config.get('rollover_idle_secs', 600)),
            filename_template=recorder_config.get('filename_template'),
            dedup_index=dedup_index)

        self.recorder = RecorderApp(
            self.RECORD_SERVER % str(self.warcserver_server.port), warc_writer)

        recorder_server = GeventServer(self.recorder, port=0)

        self.recorder_path = self.RECORD_API % (recorder_server.port,
                                                recorder_coll)
示例#3
0
    def setup_class(cls,
                    coll='pywb',
                    config_file='config_test.yaml',
                    recording=False,
                    proxy_opts={},
                    config_opts={}):

        super(BaseTestProxy, cls).setup_class()
        config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   config_file)

        cls.root_ca_file = os.path.join(cls.root_dir, 'pywb-ca-test.pem')

        opts = {
            'ca_name': 'pywb test HTTPS Proxy CA',
            'ca_file_cache': cls.root_ca_file,
            'coll': coll,
            'recording': recording,
        }

        opts.update(proxy_opts)

        custom_config = config_opts
        custom_config['proxy'] = opts

        cls.app = FrontEndApp(config_file=config_file,
                              custom_config=custom_config)

        cls.server = GeventServer(cls.app, handler_class=RequestURIWSGIHandler)
        cls.proxies = cls.proxy_dict(cls.server.port)
示例#4
0
文件: cli.py 项目: peterk/pywb
 def run_gevent(self):
     from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler
     logging.info('Starting Gevent Server on ' + str(self.r.port))
     ge = GeventServer(self.application,
                       port=self.r.port,
                       hostname=self.r.bind,
                       handler_class=RequestURIWSGIHandler,
                       direct=True)
示例#5
0
 def run_gevent(self):
     """Created the server that runs the application supplied a subclass"""
     from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler
     logging.info('Starting Gevent Server on ' + str(self.r.port))
     ge = GeventServer(self.application,
                       port=self.r.port,
                       hostname=self.r.bind,
                       handler_class=RequestURIWSGIHandler,
                       direct=True)
示例#6
0
    def create_app(cls, port):
        """Create a new instance of FrontEndApp that listens on port with a hostname of 0.0.0.0

        :param int port: The port FrontEndApp is to listen on
        :return: A new instance of FrontEndApp wrapped in GeventServer
        :rtype: GeventServer
        """
        app = FrontEndApp()
        app_server = GeventServer(app, port=port, hostname='0.0.0.0')
        return app_server
    def init_server(self, port, func, env_var_name=None):
        if port < 0:
            return None

        result = GeventServer(func(), port, handler_class=ws_handler_class)

        if env_var_name:
            os.environ[env_var_name] = 'http://localhost:{0}'.format(result.port)
            print(env_var_name + '=' + os.environ[env_var_name], flush=True)

        return result
示例#8
0
    def __init__(self, config_file=None, custom_config=None):
        """
        :param str|None config_file: Path to the config file
        :param dict|None custom_config: Dictionary containing additional configuration information
        """
        config_file = config_file or './config.yaml'
        self.handler = self.handle_request
        self.warcserver = WarcServer(config_file=config_file,
                                     custom_config=custom_config)
        self.recorder = None
        self.recorder_path = None
        self.put_custom_record_path = None
        self.proxy_default_timestamp = None

        config = self.warcserver.config

        self.debug = config.get('debug', False)

        self.warcserver_server = GeventServer(self.warcserver, port=0)

        self.proxy_prefix = None  # the URL prefix to be used for the collection with proxy mode (e.g. /coll/id_/)
        self.proxy_coll = None  # the name of the collection that has proxy mode enabled
        self.proxy_record = False  # indicate if proxy recording
        self.init_proxy(config)

        self.init_recorder(config.get('recorder'))

        self.init_autoindex(config.get('autoindex'))

        static_path = config.get('static_url_path',
                                 'pywb/static/').replace('/', os.path.sep)
        self.static_handler = StaticHandler(static_path)

        self.cdx_api_endpoint = config.get('cdx_api_endpoint', '/cdx')
        self.query_limit = config.get('query_limit')

        upstream_paths = self.get_upstream_paths(self.warcserver_server.port)

        framed_replay = config.get('framed_replay', True)
        self.rewriterapp = self.REWRITER_APP_CLS(framed_replay,
                                                 config=config,
                                                 paths=upstream_paths)

        self.templates_dir = config.get('templates_dir', 'templates')
        self.static_dir = config.get('static_dir', 'static')

        metadata_templ = os.path.join(self.warcserver.root_dir, '{coll}',
                                      'metadata.yaml')
        self.metadata_cache = MetadataCache(metadata_templ)

        self._init_routes()
示例#9
0
    def setup_class(cls,
                    coll='pywb',
                    config_file='config_test.yaml',
                    recording=False):
        super(BaseTestProxy, cls).setup_class()
        config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   config_file)

        cls.root_ca_file = os.path.join(cls.root_dir, 'pywb-ca-test.pem')

        opts = {
            'ca_name': 'pywb test HTTPS Proxy CA',
            'ca_file_cache': cls.root_ca_file,
            'coll': coll,
            'recording': recording,
        }

        cls.app = FrontEndApp(config_file=config_file,
                              custom_config={'proxy': opts})

        cls.server = GeventServer(cls.app)
        cls.proxies = cls.proxy_dict(cls.server.port)
示例#10
0
    def __init__(self, config_file='./config.yaml', custom_config=None):
        self.handler = self.handle_request
        self.warcserver = WarcServer(config_file=config_file,
                                     custom_config=custom_config)

        config = self.warcserver.config

        self.debug = config.get('debug', False)

        self.warcserver_server = GeventServer(self.warcserver, port=0)

        self.init_proxy(config)

        self.init_recorder(config.get('recorder'))

        self.init_autoindex(config.get('autoindex'))

        static_path = config.get('static_url_path',
                                 'pywb/static/').replace('/', os.path.sep)
        self.static_handler = StaticHandler(static_path)

        self.cdx_api_endpoint = config.get('cdx_api_endpoint', '/cdx')

        self._init_routes()

        upstream_paths = self.get_upstream_paths(self.warcserver_server.port)

        framed_replay = config.get('framed_replay', True)
        self.rewriterapp = RewriterApp(framed_replay,
                                       config=config,
                                       paths=upstream_paths)

        self.templates_dir = config.get('templates_dir', 'templates')
        self.static_dir = config.get('static_dir', 'static')

        metadata_templ = os.path.join(self.warcserver.root_dir, '{coll}',
                                      'metadata.yaml')
        self.metadata_cache = MetadataCache(metadata_templ)
示例#11
0
    def setup_class(cls, *args, **kwargs):
        super(HttpBinLiveTests, cls).setup_class(*args, **kwargs)

        from httpbin import app as httpbin_app
        httpbin_app.config.update(JSONIFY_PRETTYPRINT_REGULAR=True)
        cls.httpbin_server = GeventServer(httpbin_app)

        httpbin_local = 'http://localhost:' + str(
            cls.httpbin_server.port) + '/'

        def get_load_url(self, params):
            params['url'] = params['url'].replace('http://test.httpbin.org/',
                                                  httpbin_local)
            params['url'] = params['url'].replace('http://httpbin.org/',
                                                  httpbin_local)
            params['url'] = params['url'].replace('https://httpbin.org/',
                                                  httpbin_local)
            return params['url']

        cls.indexmock = patch(
            'pywb.warcserver.index.indexsource.LiveIndexSource.get_load_url',
            get_load_url)
        cls.indexmock.start()
示例#12
0
    def init_recorder(self, recorder_config):
        """Initialize the recording functionality of pywb. If recording_config is None this function is a no op

        :param str|dict|None recorder_config: The configuration for the recorder app
        :rtype: None
        """
        if not recorder_config:
            self.recorder = None
            self.recorder_path = None
            return

        if isinstance(recorder_config, str):
            recorder_coll = recorder_config
            recorder_config = {}
        else:
            recorder_coll = recorder_config['source_coll']

        # cache mode
        self.rec_cache_mode = recorder_config.get('cache', 'default')

        dedup_policy = recorder_config.get('dedup_policy')
        dedup_by_url = False

        if dedup_policy == 'none':
            dedup_policy = ''

        if dedup_policy == 'keep':
            dedup_policy = WriteDupePolicy()
        elif dedup_policy == 'revisit':
            dedup_policy = WriteRevisitDupePolicy()
        elif dedup_policy == 'skip':
            dedup_policy = SkipDupePolicy()
            dedup_by_url = True
        elif dedup_policy:
            msg = 'Invalid option for dedup_policy: {0}'
            raise Exception(msg.format(dedup_policy))

        if dedup_policy:
            dedup_index = WritableRedisIndexer(
                redis_url=self.warcserver.dedup_index_url,
                dupe_policy=dedup_policy,
                rel_path_template=self.warcserver.root_dir + '/{coll}/archive')
        else:
            dedup_index = None

        warc_writer = MultiFileWARCWriter(
            self.warcserver.archive_paths,
            max_size=int(recorder_config.get('rollover_size', 1000000000)),
            max_idle_secs=int(recorder_config.get('rollover_idle_secs', 600)),
            filename_template=recorder_config.get('filename_template'),
            dedup_index=dedup_index,
            dedup_by_url=dedup_by_url)

        if dedup_policy:
            pending_counter = self.warcserver.dedup_index_url.replace(
                ':cdxj', ':pending')
            pending_timeout = recorder_config.get('pending_timeout', 30)
            create_buff_func = lambda params, name: RedisPendingCounterTempBuffer(
                512 * 1024, pending_counter, params, name, pending_timeout)
        else:
            create_buff_func = None

        self.recorder = RecorderApp(
            self.RECORD_SERVER % str(self.warcserver_server.port),
            warc_writer,
            accept_colls=recorder_config.get('source_filter'),
            create_buff_func=create_buff_func)

        recorder_server = GeventServer(self.recorder, port=0)

        self.recorder_path = self.RECORD_API % (recorder_server.port,
                                                recorder_coll)

        # enable PUT of custom data as 'resource' records
        if recorder_config.get('enable_put_custom_record'):
            self.put_custom_record_path = self.recorder_path + '&put_record={rec_type}&url={url}'
示例#13
0
文件: frontendapp.py 项目: nla/pywb
    def init_recorder(self, recorder_config):
        """Initialize the recording functionality of pywb. If recording_config is None this function is a no op

        :param str|dict|None recorder_config: The configuration for the recorder app
        :rtype: None
        """
        if not recorder_config:
            self.recorder = None
            self.recorder_path = None
            return

        if isinstance(recorder_config, str):
            recorder_coll = recorder_config
            recorder_config = {}
        else:
            recorder_coll = recorder_config['source_coll']

        # cache mode
        self.rec_cache_mode = recorder_config.get('cache', 'default')

        dedup_policy = recorder_config.get('dedup_policy')
        dedup_by_url = False

        if dedup_policy == 'none':
            dedup_policy = ''

        if dedup_policy == 'keep':
            dedup_policy = WriteDupePolicy()
        elif dedup_policy == 'revisit':
            dedup_policy = WriteRevisitDupePolicy()
        elif dedup_policy == 'skip':
            dedup_policy = SkipDupePolicy()
            dedup_by_url = True
        elif dedup_policy:
            msg = 'Invalid option for dedup_policy: {0}'
            raise Exception(msg.format(dedup_policy))

        if dedup_policy:
            dedup_index = WritableRedisIndexer(
                redis_url=self.warcserver.dedup_index_url,
                dupe_policy=dedup_policy,
                rel_path_template=self.warcserver.root_dir + '/{coll}/archive')
        else:
            dedup_index = None

        warc_writer = MultiFileWARCWriter(
            self.warcserver.archive_paths,
            max_size=int(recorder_config.get('rollover_size', 1000000000)),
            max_idle_secs=int(recorder_config.get('rollover_idle_secs', 600)),
            filename_template=recorder_config.get('filename_template'),
            dedup_index=dedup_index,
            dedup_by_url=dedup_by_url)

        self.recorder = RecorderApp(
            self.RECORD_SERVER % str(self.warcserver_server.port),
            warc_writer,
            accept_colls=recorder_config.get('source_filter'))

        recorder_server = GeventServer(self.recorder, port=0)

        self.recorder_path = self.RECORD_API % (recorder_server.port,
                                                recorder_coll)
示例#14
0
 def setup_class(cls):
     super(LiveServerTests, cls).setup_class()
     #cls.server = ServerThreadRunner(cls.make_live_app())
     cls.server = GeventServer(cls.make_live_app())
示例#15
0
 def create_app(cls, port):
     app = FrontEndApp()
     app_server = GeventServer(app, port=port, hostname='0.0.0.0')
     return app_server
示例#16
0
 def setup_class(cls):
     cls.lint_app = False
     super(TestLiveRewriter, cls).setup_class('config_test.yaml')
     cls.header_test_serv = GeventServer(header_test_server)
     cls.cookie_test_serv = GeventServer(cookie_test_server)