def init_recorder(self, recorder_config): """Initialize the recording functionality of pywb. If recording_config is None this function is a no op""" if not recorder_config: self.recorder = None self.recorder_path = None return if isinstance(recorder_config, str): recorder_coll = recorder_config recorder_config = {} else: recorder_coll = recorder_config['source_coll'] # TODO: support dedup dedup_index = None warc_writer = MultiFileWARCWriter( self.warcserver.archive_paths, max_size=int(recorder_config.get('rollover_size', 1000000000)), max_idle_secs=int(recorder_config.get('rollover_idle_secs', 600)), filename_template=recorder_config.get('filename_template'), dedup_index=dedup_index) self.recorder = RecorderApp( self.RECORD_SERVER % str(self.warcserver_server.port), warc_writer, accept_colls=recorder_config.get('source_filter')) recorder_server = GeventServer(self.recorder, port=0) self.recorder_path = self.RECORD_API % (recorder_server.port, recorder_coll)
def init_recorder(self, recorder_config): if not recorder_config: self.recorder = None self.recorder_path = None return if isinstance(recorder_config, str): recorder_coll = recorder_config recorder_config = {} else: recorder_coll = recorder_config['source_coll'] # TODO: support dedup dedup_index = None warc_writer = MultiFileWARCWriter( self.warcserver.archive_paths, max_size=int(recorder_config.get('rollover_size', 1000000000)), max_idle_secs=int(recorder_config.get('rollover_idle_secs', 600)), filename_template=recorder_config.get('filename_template'), dedup_index=dedup_index) self.recorder = RecorderApp( self.RECORD_SERVER % str(self.warcserver_server.port), warc_writer) recorder_server = GeventServer(self.recorder, port=0) self.recorder_path = self.RECORD_API % (recorder_server.port, recorder_coll)
def setup_class(cls, coll='pywb', config_file='config_test.yaml', recording=False, proxy_opts={}, config_opts={}): super(BaseTestProxy, cls).setup_class() config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file) cls.root_ca_file = os.path.join(cls.root_dir, 'pywb-ca-test.pem') opts = { 'ca_name': 'pywb test HTTPS Proxy CA', 'ca_file_cache': cls.root_ca_file, 'coll': coll, 'recording': recording, } opts.update(proxy_opts) custom_config = config_opts custom_config['proxy'] = opts cls.app = FrontEndApp(config_file=config_file, custom_config=custom_config) cls.server = GeventServer(cls.app, handler_class=RequestURIWSGIHandler) cls.proxies = cls.proxy_dict(cls.server.port)
def run_gevent(self): from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler logging.info('Starting Gevent Server on ' + str(self.r.port)) ge = GeventServer(self.application, port=self.r.port, hostname=self.r.bind, handler_class=RequestURIWSGIHandler, direct=True)
def run_gevent(self): """Created the server that runs the application supplied a subclass""" from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler logging.info('Starting Gevent Server on ' + str(self.r.port)) ge = GeventServer(self.application, port=self.r.port, hostname=self.r.bind, handler_class=RequestURIWSGIHandler, direct=True)
def create_app(cls, port): """Create a new instance of FrontEndApp that listens on port with a hostname of 0.0.0.0 :param int port: The port FrontEndApp is to listen on :return: A new instance of FrontEndApp wrapped in GeventServer :rtype: GeventServer """ app = FrontEndApp() app_server = GeventServer(app, port=port, hostname='0.0.0.0') return app_server
def init_server(self, port, func, env_var_name=None): if port < 0: return None result = GeventServer(func(), port, handler_class=ws_handler_class) if env_var_name: os.environ[env_var_name] = 'http://localhost:{0}'.format(result.port) print(env_var_name + '=' + os.environ[env_var_name], flush=True) return result
def __init__(self, config_file=None, custom_config=None): """ :param str|None config_file: Path to the config file :param dict|None custom_config: Dictionary containing additional configuration information """ config_file = config_file or './config.yaml' self.handler = self.handle_request self.warcserver = WarcServer(config_file=config_file, custom_config=custom_config) self.recorder = None self.recorder_path = None self.put_custom_record_path = None self.proxy_default_timestamp = None config = self.warcserver.config self.debug = config.get('debug', False) self.warcserver_server = GeventServer(self.warcserver, port=0) self.proxy_prefix = None # the URL prefix to be used for the collection with proxy mode (e.g. /coll/id_/) self.proxy_coll = None # the name of the collection that has proxy mode enabled self.proxy_record = False # indicate if proxy recording self.init_proxy(config) self.init_recorder(config.get('recorder')) self.init_autoindex(config.get('autoindex')) static_path = config.get('static_url_path', 'pywb/static/').replace('/', os.path.sep) self.static_handler = StaticHandler(static_path) self.cdx_api_endpoint = config.get('cdx_api_endpoint', '/cdx') self.query_limit = config.get('query_limit') upstream_paths = self.get_upstream_paths(self.warcserver_server.port) framed_replay = config.get('framed_replay', True) self.rewriterapp = self.REWRITER_APP_CLS(framed_replay, config=config, paths=upstream_paths) self.templates_dir = config.get('templates_dir', 'templates') self.static_dir = config.get('static_dir', 'static') metadata_templ = os.path.join(self.warcserver.root_dir, '{coll}', 'metadata.yaml') self.metadata_cache = MetadataCache(metadata_templ) self._init_routes()
def setup_class(cls, coll='pywb', config_file='config_test.yaml', recording=False): super(BaseTestProxy, cls).setup_class() config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file) cls.root_ca_file = os.path.join(cls.root_dir, 'pywb-ca-test.pem') opts = { 'ca_name': 'pywb test HTTPS Proxy CA', 'ca_file_cache': cls.root_ca_file, 'coll': coll, 'recording': recording, } cls.app = FrontEndApp(config_file=config_file, custom_config={'proxy': opts}) cls.server = GeventServer(cls.app) cls.proxies = cls.proxy_dict(cls.server.port)
def __init__(self, config_file='./config.yaml', custom_config=None): self.handler = self.handle_request self.warcserver = WarcServer(config_file=config_file, custom_config=custom_config) config = self.warcserver.config self.debug = config.get('debug', False) self.warcserver_server = GeventServer(self.warcserver, port=0) self.init_proxy(config) self.init_recorder(config.get('recorder')) self.init_autoindex(config.get('autoindex')) static_path = config.get('static_url_path', 'pywb/static/').replace('/', os.path.sep) self.static_handler = StaticHandler(static_path) self.cdx_api_endpoint = config.get('cdx_api_endpoint', '/cdx') self._init_routes() upstream_paths = self.get_upstream_paths(self.warcserver_server.port) framed_replay = config.get('framed_replay', True) self.rewriterapp = RewriterApp(framed_replay, config=config, paths=upstream_paths) self.templates_dir = config.get('templates_dir', 'templates') self.static_dir = config.get('static_dir', 'static') metadata_templ = os.path.join(self.warcserver.root_dir, '{coll}', 'metadata.yaml') self.metadata_cache = MetadataCache(metadata_templ)
def setup_class(cls, *args, **kwargs): super(HttpBinLiveTests, cls).setup_class(*args, **kwargs) from httpbin import app as httpbin_app httpbin_app.config.update(JSONIFY_PRETTYPRINT_REGULAR=True) cls.httpbin_server = GeventServer(httpbin_app) httpbin_local = 'http://localhost:' + str( cls.httpbin_server.port) + '/' def get_load_url(self, params): params['url'] = params['url'].replace('http://test.httpbin.org/', httpbin_local) params['url'] = params['url'].replace('http://httpbin.org/', httpbin_local) params['url'] = params['url'].replace('https://httpbin.org/', httpbin_local) return params['url'] cls.indexmock = patch( 'pywb.warcserver.index.indexsource.LiveIndexSource.get_load_url', get_load_url) cls.indexmock.start()
def init_recorder(self, recorder_config): """Initialize the recording functionality of pywb. If recording_config is None this function is a no op :param str|dict|None recorder_config: The configuration for the recorder app :rtype: None """ if not recorder_config: self.recorder = None self.recorder_path = None return if isinstance(recorder_config, str): recorder_coll = recorder_config recorder_config = {} else: recorder_coll = recorder_config['source_coll'] # cache mode self.rec_cache_mode = recorder_config.get('cache', 'default') dedup_policy = recorder_config.get('dedup_policy') dedup_by_url = False if dedup_policy == 'none': dedup_policy = '' if dedup_policy == 'keep': dedup_policy = WriteDupePolicy() elif dedup_policy == 'revisit': dedup_policy = WriteRevisitDupePolicy() elif dedup_policy == 'skip': dedup_policy = SkipDupePolicy() dedup_by_url = True elif dedup_policy: msg = 'Invalid option for dedup_policy: {0}' raise Exception(msg.format(dedup_policy)) if dedup_policy: dedup_index = WritableRedisIndexer( redis_url=self.warcserver.dedup_index_url, dupe_policy=dedup_policy, rel_path_template=self.warcserver.root_dir + '/{coll}/archive') else: dedup_index = None warc_writer = MultiFileWARCWriter( self.warcserver.archive_paths, max_size=int(recorder_config.get('rollover_size', 1000000000)), max_idle_secs=int(recorder_config.get('rollover_idle_secs', 600)), filename_template=recorder_config.get('filename_template'), dedup_index=dedup_index, dedup_by_url=dedup_by_url) if dedup_policy: pending_counter = self.warcserver.dedup_index_url.replace( ':cdxj', ':pending') pending_timeout = recorder_config.get('pending_timeout', 30) create_buff_func = lambda params, name: RedisPendingCounterTempBuffer( 512 * 1024, pending_counter, params, name, pending_timeout) else: create_buff_func = None self.recorder = RecorderApp( self.RECORD_SERVER % str(self.warcserver_server.port), warc_writer, accept_colls=recorder_config.get('source_filter'), create_buff_func=create_buff_func) recorder_server = GeventServer(self.recorder, port=0) self.recorder_path = self.RECORD_API % (recorder_server.port, recorder_coll) # enable PUT of custom data as 'resource' records if recorder_config.get('enable_put_custom_record'): self.put_custom_record_path = self.recorder_path + '&put_record={rec_type}&url={url}'
def init_recorder(self, recorder_config): """Initialize the recording functionality of pywb. If recording_config is None this function is a no op :param str|dict|None recorder_config: The configuration for the recorder app :rtype: None """ if not recorder_config: self.recorder = None self.recorder_path = None return if isinstance(recorder_config, str): recorder_coll = recorder_config recorder_config = {} else: recorder_coll = recorder_config['source_coll'] # cache mode self.rec_cache_mode = recorder_config.get('cache', 'default') dedup_policy = recorder_config.get('dedup_policy') dedup_by_url = False if dedup_policy == 'none': dedup_policy = '' if dedup_policy == 'keep': dedup_policy = WriteDupePolicy() elif dedup_policy == 'revisit': dedup_policy = WriteRevisitDupePolicy() elif dedup_policy == 'skip': dedup_policy = SkipDupePolicy() dedup_by_url = True elif dedup_policy: msg = 'Invalid option for dedup_policy: {0}' raise Exception(msg.format(dedup_policy)) if dedup_policy: dedup_index = WritableRedisIndexer( redis_url=self.warcserver.dedup_index_url, dupe_policy=dedup_policy, rel_path_template=self.warcserver.root_dir + '/{coll}/archive') else: dedup_index = None warc_writer = MultiFileWARCWriter( self.warcserver.archive_paths, max_size=int(recorder_config.get('rollover_size', 1000000000)), max_idle_secs=int(recorder_config.get('rollover_idle_secs', 600)), filename_template=recorder_config.get('filename_template'), dedup_index=dedup_index, dedup_by_url=dedup_by_url) self.recorder = RecorderApp( self.RECORD_SERVER % str(self.warcserver_server.port), warc_writer, accept_colls=recorder_config.get('source_filter')) recorder_server = GeventServer(self.recorder, port=0) self.recorder_path = self.RECORD_API % (recorder_server.port, recorder_coll)
def setup_class(cls): super(LiveServerTests, cls).setup_class() #cls.server = ServerThreadRunner(cls.make_live_app()) cls.server = GeventServer(cls.make_live_app())
def create_app(cls, port): app = FrontEndApp() app_server = GeventServer(app, port=port, hostname='0.0.0.0') return app_server
def setup_class(cls): cls.lint_app = False super(TestLiveRewriter, cls).setup_class('config_test.yaml') cls.header_test_serv = GeventServer(header_test_server) cls.cookie_test_serv = GeventServer(cookie_test_server)