def make_live_app(): app = BaseWarcServer() app.add_route( '/live', DefaultResourceHandler( SimpleAggregator({'live': LiveIndexSource()}))) return app
def test_live(self): url = 'http://example.com/' source = LiveIndexSource() res, errs = self.query_single_source(source, dict(url=url)) expected = 'com,example)/ {0} http://example.com/'.format(timestamp_now()) assert(key_ts_res(res, 'load_url') == expected) assert(errs == {})
def setup_class(cls): super(TestBaseWarcServer, cls).setup_class() live_source = SimpleAggregator({'live': LiveIndexSource()}) live_handler = DefaultResourceHandler(live_source) app = BaseWarcServer() app.add_route('/live', live_handler) source1 = GeventTimeoutAggregator(sources) handler1 = DefaultResourceHandler(source1, TEST_WARC_PATH) app.add_route('/many', handler1) app.add_route( '/cdx_api', DefaultResourceHandler(SimpleAggregator(ia_cdx), TEST_WARC_PATH)) source2 = SimpleAggregator( {'post': FileIndexSource(TEST_CDX_PATH + 'post-test.cdxj')}) handler2 = DefaultResourceHandler(source2, TEST_WARC_PATH) app.add_route('/posttest', handler2) source3 = SimpleAggregator( {'example': FileIndexSource(TEST_CDX_PATH + 'example2.cdxj')}) handler3 = DefaultResourceHandler(source3, TEST_WARC_PATH) app.add_route('/fallback', HandlerSeq([handler3, handler2, live_handler])) app.add_route('/seq', HandlerSeq([handler3, handler2])) app.add_route( '/allredis', DefaultResourceHandler(source3, 'redis://localhost/2/test:warc')) app.add_route('/empty', HandlerSeq([])) app.add_route( '/invalid', DefaultResourceHandler( [SimpleAggregator({'invalid': 'should not be a callable'})])) url_agnost = SimpleAggregator({ 'url-agnost': FileIndexSource(TEST_CDX_PATH + 'url-agnost-example.cdxj') }) app.add_route( '/urlagnost', DefaultResourceHandler(url_agnost, 'redis://localhost/2/test:{arg}:warc')) cls.testapp = webtest.TestApp(app)
def __init__(self): init_logging() config = load_wr_config() app = BaseWarcServer(debug=True) redis_base = os.environ['REDIS_BASE_URL'] + '/' rec_url = redis_base + config['cdxj_key_templ'] coll_url = redis_base + config['coll_cdxj_key_templ'] warc_url = redis_base + config['warc_key_templ'] rec_list_key = config['rec_list_key_templ'] redis_resolver = RedisResolver(redis_url=warc_url, member_key_templ=rec_list_key) redis = redis_resolver.redis warc_resolvers = [redis_resolver] cache_proxy_url = os.environ.get('CACHE_PROXY_URL', '') global PROXY_PREFIX PROXY_PREFIX = cache_proxy_url timeout = 20.0 rec_redis_source = RedisIndexSource(timeout=timeout, redis_url=rec_url, redis=redis) coll_redis_source = RedisIndexSource(timeout=timeout, redis_url=coll_url, redis=redis) live_rec = DefaultResourceHandler( SimpleAggregator({'live': LiveIndexSource()}, ), warc_resolvers, cache_proxy_url) # Extractable archives (all available) wam_loader = WAMSourceLoader(memento_cls=ProxyMementoIndexSource, remote_cls=ProxyRemoteIndexSource, wb_memento_cls=ProxyWBMementoIndexSource) extractable_archives = wam_loader.sources # Extract Source extractor = GeventTimeoutAggregator(extractable_archives, timeout=timeout) extract_primary = DefaultResourceHandler(extractor, warc_resolvers, cache_proxy_url) # Patch fallback archives fallback_archives = self.filter_archives( extractable_archives, config['patch_archives_index']) # patch + live #patch_archives = fallback_archives.copy() patch_archives = fallback_archives patch_archives['live'] = LiveIndexSource() extractor2 = GeventTimeoutAggregator(patch_archives, timeout=timeout, sources_key='inv_sources', invert_sources=True) extract_other = DefaultResourceHandler(extractor2, warc_resolvers, cache_proxy_url) patcher = GeventTimeoutAggregator(patch_archives, timeout=timeout) patch_rec = DefaultResourceHandler(patcher, warc_resolvers, cache_proxy_url) # Single Rec Replay replay_rec = DefaultResourceHandler( SimpleAggregator({'local': rec_redis_source}), warc_resolvers, cache_proxy_url) # Coll Replay replay_coll = DefaultResourceHandler( SimpleAggregator({'local': coll_redis_source}), warc_resolvers, cache_proxy_url) app.add_route('/live', live_rec) app.add_route('/extract', HandlerSeq([extract_primary, extract_other, replay_rec])) app.add_route('/replay', replay_rec) app.add_route('/replay-coll', replay_coll) app.add_route('/patch', HandlerSeq([replay_coll, patch_rec])) self.app = app
from pywb.warcserver.index.indexsource import MementoIndexSource, FileIndexSource, LiveIndexSource from pywb.warcserver.index.indexsource import RemoteIndexSource from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator from pywb.warcserver.index.aggregator import DirectoryIndexSource from pywb.warcserver.basewarcserver import BaseWarcServer from pywb.utils.memento import MementoUtils sources = { 'local': DirectoryIndexSource(TEST_CDX_PATH), 'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'), 'rhiz': MementoIndexSource.from_timegate_url('https://webenact.rhizome.org/vvork/'), 'live': LiveIndexSource(), } ia_cdx = { 'ia-cdx': RemoteIndexSource('http://web.archive.org/cdx?url={url}&closest={closest}&sort=closest', 'http://web.archive.org/web/{timestamp}id_/{url}') } class TestBaseWarcServer(HttpBinLiveTests, MementoOverrideTests, FakeRedisTests, BaseTestClass): @classmethod def setup_class(cls): super(TestBaseWarcServer, cls).setup_class()
from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator from pywb.warcserver.index.aggregator import DirectoryIndexSource from pywb.warcserver.basewarcserver import BaseWarcServer from pywb.utils.memento import MementoUtils sources = { 'local': DirectoryIndexSource(TEST_CDX_PATH), 'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'), 'rhiz': MementoIndexSource.from_timegate_url('http://webenact.rhizome.org/vvork/', path='*'), 'live': LiveIndexSource(), } ia_cdx = { 'ia-cdx': RemoteIndexSource( 'http://web.archive.org/cdx?url={url}&closest={closest}&sort=closest', 'http://web.archive.org/web/{timestamp}id_/{url}') } class TestBaseWarcServer(HttpBinLiveTests, MementoOverrideTests, FakeRedisTests, BaseTestClass): @classmethod def setup_class(cls): super(TestBaseWarcServer, cls).setup_class()