示例#1
0
 def make_live_app():
     app = BaseWarcServer()
     app.add_route(
         '/live',
         DefaultResourceHandler(
             SimpleAggregator({'live': LiveIndexSource()})))
     return app
    def test_live(self):
        url = 'http://example.com/'
        source = LiveIndexSource()
        res, errs = self.query_single_source(source, dict(url=url))

        expected = 'com,example)/ {0} http://example.com/'.format(timestamp_now())

        assert(key_ts_res(res, 'load_url') == expected)
        assert(errs == {})
示例#3
0
    def setup_class(cls):
        super(TestBaseWarcServer, cls).setup_class()

        live_source = SimpleAggregator({'live': LiveIndexSource()})
        live_handler = DefaultResourceHandler(live_source)
        app = BaseWarcServer()
        app.add_route('/live', live_handler)

        source1 = GeventTimeoutAggregator(sources)
        handler1 = DefaultResourceHandler(source1, TEST_WARC_PATH)
        app.add_route('/many', handler1)

        app.add_route(
            '/cdx_api',
            DefaultResourceHandler(SimpleAggregator(ia_cdx), TEST_WARC_PATH))

        source2 = SimpleAggregator(
            {'post': FileIndexSource(TEST_CDX_PATH + 'post-test.cdxj')})
        handler2 = DefaultResourceHandler(source2, TEST_WARC_PATH)
        app.add_route('/posttest', handler2)

        source3 = SimpleAggregator(
            {'example': FileIndexSource(TEST_CDX_PATH + 'example2.cdxj')})
        handler3 = DefaultResourceHandler(source3, TEST_WARC_PATH)

        app.add_route('/fallback',
                      HandlerSeq([handler3, handler2, live_handler]))

        app.add_route('/seq', HandlerSeq([handler3, handler2]))

        app.add_route(
            '/allredis',
            DefaultResourceHandler(source3, 'redis://localhost/2/test:warc'))

        app.add_route('/empty', HandlerSeq([]))
        app.add_route(
            '/invalid',
            DefaultResourceHandler(
                [SimpleAggregator({'invalid': 'should not be a callable'})]))

        url_agnost = SimpleAggregator({
            'url-agnost':
            FileIndexSource(TEST_CDX_PATH + 'url-agnost-example.cdxj')
        })
        app.add_route(
            '/urlagnost',
            DefaultResourceHandler(url_agnost,
                                   'redis://localhost/2/test:{arg}:warc'))

        cls.testapp = webtest.TestApp(app)
    def __init__(self):
        init_logging()

        config = load_wr_config()

        app = BaseWarcServer(debug=True)

        redis_base = os.environ['REDIS_BASE_URL'] + '/'

        rec_url = redis_base + config['cdxj_key_templ']
        coll_url = redis_base + config['coll_cdxj_key_templ']
        warc_url = redis_base + config['warc_key_templ']
        rec_list_key = config['rec_list_key_templ']

        redis_resolver = RedisResolver(redis_url=warc_url,
                                       member_key_templ=rec_list_key)
        redis = redis_resolver.redis
        warc_resolvers = [redis_resolver]

        cache_proxy_url = os.environ.get('CACHE_PROXY_URL', '')
        global PROXY_PREFIX
        PROXY_PREFIX = cache_proxy_url

        timeout = 20.0

        rec_redis_source = RedisIndexSource(timeout=timeout,
                                            redis_url=rec_url,
                                            redis=redis)

        coll_redis_source = RedisIndexSource(timeout=timeout,
                                             redis_url=coll_url,
                                             redis=redis)

        live_rec = DefaultResourceHandler(
            SimpleAggregator({'live': LiveIndexSource()}, ), warc_resolvers,
            cache_proxy_url)

        # Extractable archives (all available)
        wam_loader = WAMSourceLoader(memento_cls=ProxyMementoIndexSource,
                                     remote_cls=ProxyRemoteIndexSource,
                                     wb_memento_cls=ProxyWBMementoIndexSource)

        extractable_archives = wam_loader.sources

        # Extract Source
        extractor = GeventTimeoutAggregator(extractable_archives,
                                            timeout=timeout)
        extract_primary = DefaultResourceHandler(extractor, warc_resolvers,
                                                 cache_proxy_url)

        # Patch fallback archives
        fallback_archives = self.filter_archives(
            extractable_archives, config['patch_archives_index'])

        # patch + live
        #patch_archives = fallback_archives.copy()
        patch_archives = fallback_archives
        patch_archives['live'] = LiveIndexSource()

        extractor2 = GeventTimeoutAggregator(patch_archives,
                                             timeout=timeout,
                                             sources_key='inv_sources',
                                             invert_sources=True)

        extract_other = DefaultResourceHandler(extractor2, warc_resolvers,
                                               cache_proxy_url)

        patcher = GeventTimeoutAggregator(patch_archives, timeout=timeout)
        patch_rec = DefaultResourceHandler(patcher, warc_resolvers,
                                           cache_proxy_url)

        # Single Rec Replay
        replay_rec = DefaultResourceHandler(
            SimpleAggregator({'local': rec_redis_source}), warc_resolvers,
            cache_proxy_url)

        # Coll Replay
        replay_coll = DefaultResourceHandler(
            SimpleAggregator({'local': coll_redis_source}), warc_resolvers,
            cache_proxy_url)

        app.add_route('/live', live_rec)
        app.add_route('/extract',
                      HandlerSeq([extract_primary, extract_other, replay_rec]))
        app.add_route('/replay', replay_rec)
        app.add_route('/replay-coll', replay_coll)
        app.add_route('/patch', HandlerSeq([replay_coll, patch_rec]))

        self.app = app
示例#5
0
from pywb.warcserver.index.indexsource import MementoIndexSource, FileIndexSource, LiveIndexSource
from pywb.warcserver.index.indexsource import RemoteIndexSource

from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator
from pywb.warcserver.index.aggregator import DirectoryIndexSource

from pywb.warcserver.basewarcserver import BaseWarcServer
from pywb.utils.memento import MementoUtils


sources = {
    'local': DirectoryIndexSource(TEST_CDX_PATH),
    'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'),
    'rhiz': MementoIndexSource.from_timegate_url('https://webenact.rhizome.org/vvork/'),
    'live': LiveIndexSource(),
}

ia_cdx = {
    'ia-cdx': RemoteIndexSource('http://web.archive.org/cdx?url={url}&closest={closest}&sort=closest',
                                'http://web.archive.org/web/{timestamp}id_/{url}')
}




class TestBaseWarcServer(HttpBinLiveTests, MementoOverrideTests, FakeRedisTests, BaseTestClass):
    @classmethod
    def setup_class(cls):
        super(TestBaseWarcServer, cls).setup_class()
示例#6
0
from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator
from pywb.warcserver.index.aggregator import DirectoryIndexSource

from pywb.warcserver.basewarcserver import BaseWarcServer
from pywb.utils.memento import MementoUtils

sources = {
    'local':
    DirectoryIndexSource(TEST_CDX_PATH),
    'ia':
    MementoIndexSource.from_timegate_url('http://web.archive.org/web/'),
    'rhiz':
    MementoIndexSource.from_timegate_url('http://webenact.rhizome.org/vvork/',
                                         path='*'),
    'live':
    LiveIndexSource(),
}

ia_cdx = {
    'ia-cdx':
    RemoteIndexSource(
        'http://web.archive.org/cdx?url={url}&closest={closest}&sort=closest',
        'http://web.archive.org/web/{timestamp}id_/{url}')
}


class TestBaseWarcServer(HttpBinLiveTests, MementoOverrideTests,
                         FakeRedisTests, BaseTestClass):
    @classmethod
    def setup_class(cls):
        super(TestBaseWarcServer, cls).setup_class()