Exemplo n.º 1
0
def create_cdx_server_app(passed_config):
    """
    Create a cdx server api-only app
    For each collection, create a /<coll>-cdx access point
    which follows the cdx api
    """

    defaults = load_yaml_config(DEFAULT_CONFIG)

    config = DictChain(passed_config, defaults)

    collections = config.get('collections', {})

    static_routes = {}

    # collections based on file system
    if config.get('enable_auto_colls', True):
        colls_loader_cls = config.get('colls_loader_cls', DirectoryCollsLoader)
        dir_loader = colls_loader_cls(config, static_routes)
        collections.update(dir_loader())

    routes = []

    for name, value in collections.iteritems():
        route_config = init_route_config(value, config)
        query_handler = init_collection(route_config)

        cdx_api_suffix = route_config.get('enable_cdx_api', True)

        add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler)

    return ArchivalRouter(routes)
Exemplo n.º 2
0
def init(configfile='config.yaml', redis_url=None):
    logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
                        level=logging.DEBUG)
    logging.debug('')

    # set boto log to error
    boto_log = logging.getLogger('boto')
    if boto_log:
        boto_log.setLevel(logging.ERROR)

    config = load_yaml_config(configfile)

    if not redis_url:
        redis_url = expandvars(config['redis_url'])

    redis_obj = StrictRedis.from_url(redis_url)

    config['redis_warc_resolver'] = DynRedisResolver(
        redis_obj,
        remote_target=config['remote_target'],
        proxy_target=config['proxy_target'])

    bottle_app = default_app()

    final_app, cork = init_cork(bottle_app, redis_obj, config)

    webrec = WebRec(config, cork, redis_obj)
    bottle_app.install(webrec)

    pywb_dispatch = PywbDispatcher(bottle_app)

    init_routes(webrec)
    pywb_dispatch.init_routes()

    return final_app
Exemplo n.º 3
0
def init_app(init_func, load_yaml=True, config_file=None, config=None):
    logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
                        level=logging.DEBUG)
    logging.debug('')

    wsgiapp_class = None
    try:
        config = config or {}
        if load_yaml:
            # env setting overrides all others
            env_config = os.environ.get('PYWB_CONFIG_FILE')
            if env_config:
                config_file = env_config

            if not config_file:
                config_file = DEFAULT_CONFIG_FILE

            if os.path.isfile(config_file):
                config = load_yaml_config(config_file)

        wsgiapp_class = config.get('wsgiapp_class', WSGIApp)
        wb_router = init_func(config)
    except:
        msg = '*** pywb app init FAILED config from "%s"!\n'
        logging.exception(msg, init_func.__name__)
        raise
    else:
        msg = '*** pywb app inited with config from "%s"!\n'
        logging.debug(msg, init_func.__name__)

    return wsgiapp_class(wb_router)
Exemplo n.º 4
0
def init_app(init_func, load_yaml=True, config_file=None, config=None):
    logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
                        level=logging.DEBUG)
    logging.debug('')

    try:
        config = config or {}
        if load_yaml:
            # env setting overrides all others
            env_config = os.environ.get('PYWB_CONFIG_FILE')
            if env_config:
                config_file = env_config

            if not config_file:
                config_file = DEFAULT_CONFIG_FILE

            if os.path.isfile(config_file):
                config = load_yaml_config(config_file)

        wb_router = init_func(config)
    except:
        msg = '*** pywb app init FAILED config from "%s"!\n'
        logging.exception(msg, init_func.__name__)
        raise
    else:
        msg = '*** pywb app inited with config from "%s"!\n'
        logging.debug(msg, init_func.__name__)

    return WSGIApp(wb_router)
Exemplo n.º 5
0
def create_cdx_server_app(passed_config):
    """
    Create a cdx server api-only app
    For each collection, create a /<coll>-cdx access point
    which follows the cdx api
    """

    defaults = load_yaml_config(DEFAULT_CONFIG)

    config = DictChain(passed_config, defaults)

    collections = config.get('collections', {})

    static_routes = {}

    # collections based on file system
    if config.get('enable_auto_colls', True):
        colls_loader_cls = config.get('colls_loader_cls', DirectoryCollsLoader)
        dir_loader = colls_loader_cls(config, static_routes, collections)
        dir_loader()
        #collections.update(dir_loader())

    routes = []

    for name, value in six.iteritems(collections):
        route_config = init_route_config(value, config)
        query_handler = init_collection(route_config)

        cdx_api_suffix = route_config.get('enable_cdx_api', True)

        add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler)

    return ArchivalRouter(routes)
Exemplo n.º 6
0
def init(configfile='config.yaml', redis_url=None):
    logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
                        level=logging.DEBUG)
    logging.debug('')

    # set boto log to error
    boto_log = logging.getLogger('boto')
    if boto_log:
        boto_log.setLevel(logging.ERROR)

    config = load_yaml_config(configfile)

    if not redis_url:
        redis_url = expandvars(config['redis_url'])

    redis_obj = StrictRedis.from_url(redis_url)

    config['redis_warc_resolver'] = DynRedisResolver(redis_obj,
                                                     remote_target=config['remote_target'],
                                                     proxy_target=config['proxy_target'])


    bottle_app = default_app()

    final_app, cork = init_cork(bottle_app, redis_obj, config)

    webrec = WebRec(config, cork, redis_obj)
    bottle_app.install(webrec)

    pywb_dispatch = PywbDispatcher(bottle_app)

    init_routes(webrec)
    pywb_dispatch.init_routes()

    return final_app
Exemplo n.º 7
0
    def __init__(self, coll_name, colls_dir='collections', must_exist=True):
        self.default_config = load_yaml_config(DEFAULT_CONFIG)

        self.colls_dir = colls_dir

        self._set_coll_dirs(coll_name)

        if must_exist:
            self._assert_coll_exists()
Exemplo n.º 8
0
def init_manager_for_invite(configfile='config.yaml'):
    config = load_yaml_config(configfile)

    redis_url = os.path.expandvars(config['redis_url'])

    redis_obj = StrictRedis.from_url(redis_url)

    cork = create_cork(redis_obj, config)

    manager = CollsManager(cork, redis_obj, None, None, None)
    return manager
Exemplo n.º 9
0
def init_manager_for_invite(configfile='config.yaml'):
    config = load_yaml_config(configfile)

    redis_url = os.path.expandvars(config['redis_url'])

    redis_obj = StrictRedis.from_url(redis_url)

    cork = create_cork(redis_obj, config)

    manager = CollsManager(cork, redis_obj, None, None, None)
    return manager
Exemplo n.º 10
0
    def store_new(self, coll, path, mtime):
        """Load a collections metadata file and store it

        :param str coll: The name of the collection the metadata is for
        :param str path: The path to the collections metadata file
        :param float mtime: The current mtime of the collections metadata file
        :return: The collections metadata
        :rtype: dict
        """
        obj = load_yaml_config(path)
        self.cache[coll] = (mtime, obj)
        return obj
Exemplo n.º 11
0
    def store_new(self, coll, path, mtime):
        """Load a collections metadata file and store it

        :param str coll: The name of the collection the metadata is for
        :param str path: The path to the collections metadata file
        :param float mtime: The current mtime of the collections metadata file
        :return: The collections metadata
        :rtype: dict
        """
        obj = load_yaml_config(path)
        self.cache[coll] = (mtime, obj)
        return obj
Exemplo n.º 12
0
    def __init__(self, coll_name, colls_dir='collections', must_exist=True):
        self.default_config = load_yaml_config(DEFAULT_CONFIG)

        if coll_name and not self.COLL_RX.match(coll_name):
            raise ValueError('Invalid Collection Name: ' + coll_name)

        self.colls_dir = os.path.join(os.getcwd(), colls_dir)

        self._set_coll_dirs(coll_name)

        if must_exist:
            self._assert_coll_exists()
Exemplo n.º 13
0
    def _load_templates_map(self):
        defaults = load_yaml_config(DEFAULT_CONFIG)

        temp_dir = defaults['templates_dir']

        # Coll Templates
        templates = defaults['html_templates']

        for name in templates:
            defaults[name] = os.path.join(temp_dir, defaults[name])

        return defaults, templates
Exemplo n.º 14
0
    def __init__(self, coll_name, colls_dir='collections', must_exist=True):
        self.default_config = load_yaml_config(DEFAULT_CONFIG)

        if coll_name and not self.COLL_RX.match(coll_name):
            raise ValueError('Invalid Collection Name: ' + coll_name)

        self.colls_dir = os.path.join(os.getcwd(), colls_dir)

        self._set_coll_dirs(coll_name)

        if must_exist:
            self._assert_coll_exists()
Exemplo n.º 15
0
    def _load_templates_map(self):
        defaults = load_yaml_config(DEFAULT_CONFIG)

        temp_dir = defaults['templates_dir']

        # Coll Templates
        templates = defaults['html_templates']

        for name in templates:
            defaults[name] = os.path.join(temp_dir, defaults[name])

        return defaults, templates
Exemplo n.º 16
0
    def __init__(self, config_file='./config.yaml', custom_config=None):
        config = load_yaml_config(DEFAULT_CONFIG)

        if config_file:
            try:
                file_config = load_overlay_config('PYWB_CONFIG_FILE',
                                                  config_file)
                config.update(file_config)
            except Exception as e:
                if not custom_config:
                    custom_config = {'debug': True}
                print(e)

        if custom_config:
            if 'collections' in custom_config and 'collections' in config:
                custom_config['collections'].update(config['collections'])
            if 'proxy' in custom_config and 'proxy' in config:
                custom_config['proxy'].update(config['proxy'])
            config.update(custom_config)

        super(WarcServer, self).__init__(debug=config.get('debug', False))
        self.config = config

        self.root_dir = self.config.get('collections_root', '')
        self.index_paths = self.init_paths('index_paths')
        self.archive_paths = self.init_paths('archive_paths', self.root_dir)
        self.acl_paths = self.init_paths('acl_paths')

        self.default_access = self.config.get('default_access')

        self.rules_file = self.config.get('rules_file', '')

        self.auto_handler = None

        if self.config.get('enable_auto_colls', True):
            self.auto_handler = self.load_auto_colls()

        self.fixed_routes = self.load_colls()

        for name, route in iteritems(self.fixed_routes):
            if route == self.auto_handler:
                self.add_route('/' + name,
                               route,
                               path_param_name='param.coll',
                               default_value='*')
            else:
                self.add_route('/' + name, route)

        if self.auto_handler:
            self.add_route('/<path_param_value>',
                           self.auto_handler,
                           path_param_name='param.coll')
Exemplo n.º 17
0
    def filter_archives(self, archives, patch_archives_index):
        patch_archives = {}
        if not patch_archives_index:
            return patch_archives

        filter_list = load_yaml_config(patch_archives_index)
        filter_list = filter_list.get('webarchive_ids', {})

        for name in archives.keys():
            if name in filter_list:
                patch_archives[name] = archives[name]

        return patch_archives
    def filter_archives(self, archives, patch_archives_index):
        patch_archives = {}
        if not patch_archives_index:
            return patch_archives

        filter_list = load_yaml_config(patch_archives_index)
        filter_list = filter_list.get('webarchive_ids', {})

        for name in archives.keys():
            if name in filter_list:
                patch_archives[name] = archives[name]

        return patch_archives
Exemplo n.º 19
0
    def __init__(self, filename=None):
        filename = filename or DEFAULT_RULES_FILE
        config = load_yaml_config(filename)
        self.rules = []
        for rule in config.get('rules'):
            rule = self.parse_fuzzy_rule(rule)
            if rule:
                self.rules.append(rule)

        self.default_filters = config.get('default_filters')

        self.url_normalize_rx = [
            (re.compile(rule['match']), rule['replace'])
            for rule in self.default_filters['url_normalize']
        ]
Exemplo n.º 20
0
    def filter_archives(self, archives, patch_archives_index):
        patch_archives = {}
        if not patch_archives_index:
            return patch_archives

        filter_list = load_yaml_config(patch_archives_index)
        filter_list = filter_list.get('webarchive_ids', {})
        #weird error remains
        for name in archives.keys():
            try:
                if name in filter_list:
                    patch_archives[name] = archives[name]
            except Exception as e:
                print(e)

        return patch_archives
Exemplo n.º 21
0
def init():
    config = load_yaml_config('./config.yaml')

    ipfs_host = config.get('ipfs_host', 'localhost')
    ipfs_port = config.get('ipfs_port', 5001)
    redis_url = config.get('redis_url')

    global rec_dir
    rec_dir = config.get('tmp_rec_dir', '/tmp/rec')

    global ipfs_api
    ipfs_api = Client(ipfs_host, ipfs_port)

    global redis_cli
    redis_cli = StrictRedis.from_url(redis_url)

    LOADERS['ipfs'] = IPFSLoader
Exemplo n.º 22
0
    def _load_templates_map(self):
        defaults = load_yaml_config(DEFAULT_CONFIG)

        # Coll Templates
        templates = defaults['paths']['template_files']

        for name, _ in templates.iteritems():
            templates[name] = defaults[name]


        # Shared Templates
        shared_templates = defaults['paths']['shared_template_files']

        for name, _ in shared_templates.iteritems():
            shared_templates[name] = defaults[name]

        return templates, shared_templates
Exemplo n.º 23
0
    def _load_templates_map(self):
        defaults = load_yaml_config(DEFAULT_CONFIG)

        temp_dir = defaults['paths']['templates_dir']

        # Coll Templates
        templates = defaults['paths']['template_files']

        for name, _ in templates.iteritems():
            templates[name] = os.path.join(temp_dir, defaults[name])

        # Shared Templates
        shared_templates = defaults['paths']['shared_template_files']

        for name, _ in shared_templates.iteritems():
            shared_templates[name] = os.path.join(temp_dir, defaults[name])

        return templates, shared_templates
Exemplo n.º 24
0
    def _load_templates_map(self):
        defaults = load_yaml_config(DEFAULT_CONFIG)

        temp_dir = defaults['paths']['templates_dir']

        # Coll Templates
        templates = defaults['paths']['template_files']

        for name, _ in six.iteritems(templates):
            templates[name] = os.path.join(temp_dir, defaults[name])

        # Shared Templates
        shared_templates = defaults['paths']['shared_template_files']

        for name, _ in six.iteritems(shared_templates):
            shared_templates[name] = os.path.join(temp_dir, defaults[name])

        return templates, shared_templates
Exemplo n.º 25
0
    def __init__(self, rule_cls, fieldname, **kwargs):
        """
        A domain specific rules block, inited via config map.
        If config map not specified, it is loaded from default location.

        The rules are represented as a map by domain.
        Each rules configuration will load is own field type
        from the list and given a specified rule_cls.
        """

        self.rules = []

        default_rule_config = kwargs.get('default_rule_config')

        ds_rules_file = kwargs.get('ds_rules_file')

        if not ds_rules_file:
            ds_rules_file = DEFAULT_RULES_FILE

        config = load_yaml_config(ds_rules_file)

        # load rules dict or init to empty
        rulesmap = config.get('rules') if config else {}

        def_key_found = False

        # iterate over master rules file
        for value in rulesmap:
            url_prefix = value.get('url_prefix')
            rules_def = value.get(fieldname)
            if not rules_def:
                continue

            if url_prefix == self.DEFAULT_KEY:
                def_key_found = True

            self.rules.append(rule_cls(url_prefix, rules_def))

        # if default_rule_config provided, always init a default ruleset
        if not def_key_found and default_rule_config is not None:
            self.rules.append(rule_cls(self.DEFAULT_KEY, default_rule_config))
Exemplo n.º 26
0
 def load_rules(self, filename):
     config = load_yaml_config(filename)
     for rule in config.get('rules'):
         rule = self.parse_rewrite_rule(rule)
         if rule:
             self.rules.append(rule)
Exemplo n.º 27
0
def create_wb_router(passed_config=None):
    passed_config = passed_config or {}

    defaults = load_yaml_config(DEFAULT_CONFIG)

    config = DictChain(passed_config, defaults)

    routes = []

    port = config.get("port")

    collections = config.get("collections", {})

    static_routes = config.get("static_routes", {})

    root_route = None

    # collections based on file system
    if config.get("enable_auto_colls", True):
        colls_loader_cls = config.get("colls_loader_cls", DirectoryCollsLoader)
        dir_loader = colls_loader_cls(config, static_routes, collections)
        dir_loader()
        # collections.update(dir_loader())

    if config.get("enable_memento", False):
        request_class = MementoRequest
    else:
        request_class = WbRequest

    # store live and replay handlers
    handler_dict = {}

    # setup template globals
    templates_dirs = config["templates_dirs"]
    jinja_env = J2TemplateView.init_shared_env(paths=templates_dirs, packages=config["template_packages"])

    jinja_env.globals.update(config.get("template_globals", {}))

    for static_name, static_path in static_routes.iteritems():
        routes.append(Route(static_name, StaticHandler(static_path)))

    for name, value in collections.iteritems():
        if isinstance(value, BaseHandler):
            handler_dict[name] = value
            new_route = Route(name, value, config=config)
            if name != "":
                routes.append(new_route)
            else:
                root_route = new_route
            continue

        route_config = init_route_config(value, config)
        route_class = route_config.get("route_class", Route)

        if route_config.get("index_paths") == "$liveweb":
            live = create_live_handler(route_config)
            handler_dict[name] = live
            new_route = route_class(name, live, config=route_config)
            if name != "":
                routes.append(new_route)
            else:
                root_route = new_route
            continue

        query_handler = init_collection(route_config)

        wb_handler = create_wb_handler(query_handler=query_handler, config=route_config)

        handler_dict[name] = wb_handler

        logging.debug("Adding Collection: " + name)

        new_route = route_class(name, wb_handler, config=route_config, request_class=request_class)

        if name != "":
            routes.append(new_route)
        else:
            root_route = new_route

        # cdx query handler
        cdx_api_suffix = route_config.get("enable_cdx_api", False)

        if cdx_api_suffix:
            add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler, route_class=route_class)

    if config.get("debug_echo_env", False):
        routes.append(Route("echo_env", DebugEchoEnvHandler()))

    if config.get("debug_echo_req", False):
        routes.append(Route("echo_req", DebugEchoHandler()))

    if root_route:
        routes.append(root_route)

    # resolve any cross handler references
    for route in routes:
        if hasattr(route.handler, "resolve_refs"):
            route.handler.resolve_refs(handler_dict)

    # default to regular archival mode
    router = ArchivalRouter

    if config.get("enable_http_proxy", False):
        router = ProxyArchivalRouter

        view = init_view(config, "proxy_select_html")

        if "proxy_options" not in passed_config:
            passed_config["proxy_options"] = {}

        if view:
            passed_config["proxy_options"]["proxy_select_view"] = view

        view = init_view(config, "proxy_cert_download_html")

        if view:
            passed_config["proxy_options"]["proxy_cert_download_view"] = view

    # Finally, create wb router
    return router(
        routes,
        port=port,
        abs_path=config.get("absolute_paths", True),
        home_view=init_view(config, "home_html"),
        error_view=init_view(config, "error_html"),
        info_view=init_view(config, "info_json"),
        config=config,
    )
Exemplo n.º 28
0
 def load_yaml_file(self, root_dir, filename):
     filename = os.path.join(root_dir, filename)
     if os.path.isfile(filename):
         return load_yaml_config(filename)
     else:
         return {}
Exemplo n.º 29
0
def create_wb_router(passed_config={}):

    defaults = load_yaml_config(DEFAULT_CONFIG)

    config = DictChain(passed_config, defaults)

    routes = []

    port = config.get('port')

    collections = config.get('collections', {})

    static_routes = config.get('static_routes', {})

    # collections based on file system
    dir_loader = DirectoryCollsLoader(config, static_routes)
    collections.update(dir_loader())

    if config.get('enable_memento', False):
        request_class = MementoRequest
    else:
        request_class = WbRequest

    # store live and replay handlers
    handler_dict = {}

    # setup template globals
    template_globals = config.get('template_globals')
    if template_globals:
        add_env_globals(template_globals)

    for name, value in collections.iteritems():
        if isinstance(value, BaseHandler):
            handler_dict[name] = value
            routes.append(Route(name, value, config=route_config))
            continue

        route_config = init_route_config(value, config)

        if route_config.get('index_paths') == '$liveweb':
            live = create_live_handler(route_config)
            handler_dict[name] = live
            routes.append(Route(name, live, config=route_config))
            continue

        query_handler = init_collection(route_config)

        wb_handler = create_wb_handler(
            query_handler=query_handler,
            config=route_config,
        )

        handler_dict[name] = wb_handler

        logging.debug('Adding Collection: ' + name)

        route_class = route_config.get('route_class', Route)

        routes.append(
            route_class(name,
                        wb_handler,
                        config=route_config,
                        request_class=request_class))

        # cdx query handler
        cdx_api_suffix = route_config.get('enable_cdx_api', False)

        if cdx_api_suffix:
            add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler)

    if config.get('debug_echo_env', False):
        routes.append(Route('echo_env', DebugEchoEnvHandler()))

    if config.get('debug_echo_req', False):
        routes.append(Route('echo_req', DebugEchoHandler()))

    for static_name, static_path in static_routes.iteritems():
        routes.append(Route(static_name, StaticHandler(static_path)))

    # resolve any cross handler references
    for route in routes:
        if hasattr(route.handler, 'resolve_refs'):
            route.handler.resolve_refs(handler_dict)

    # default to regular archival mode
    router = ArchivalRouter

    if config.get('enable_http_proxy', False):
        router = ProxyArchivalRouter

        view = J2TemplateView.create_template(config.get('proxy_select_html'),
                                              'Proxy Coll Selector')

        if 'proxy_options' not in passed_config:
            passed_config['proxy_options'] = {}

        if view:
            passed_config['proxy_options']['proxy_select_view'] = view

        view = J2TemplateView.create_template(
            config.get('proxy_cert_download_html'), 'Proxy Cert Download')

        if view:
            passed_config['proxy_options']['proxy_cert_download_view'] = view

    # Finally, create wb router
    return router(
        routes,
        port=port,
        abs_path=config.get('absolute_paths', True),
        home_view=J2TemplateView.create_template(config.get('home_html'),
                                                 'Home Page'),
        error_view=J2TemplateView.create_template(config.get('error_html'),
                                                  'Error Page'),
        config=config)
Exemplo n.º 30
0
 def load_all(self):
     for filename in self.load_from_index(self.base_dir, self.index_file):
         data = load_yaml_config(filename)
         res = self.process(data)
Exemplo n.º 31
0
def create_wb_router(passed_config=None):
    passed_config = passed_config or {}

    defaults = load_yaml_config(DEFAULT_CONFIG)

    config = DictChain(passed_config, defaults)

    routes = []

    port = config.get('port')

    collections = config.get('collections', {})

    static_routes = config.get('static_routes', {})

    root_route = None

    # collections based on file system
    if config.get('enable_auto_colls', True):
        colls_loader_cls = config.get('colls_loader_cls', DirectoryCollsLoader)
        dir_loader = colls_loader_cls(config, static_routes)
        collections.update(dir_loader())

    if config.get('enable_memento', False):
        request_class = MementoRequest
    else:
        request_class = WbRequest

    # store live and replay handlers
    handler_dict = {}

    # setup template globals
    templates_dirs = config['templates_dirs']
    jinja_env = J2TemplateView.init_shared_env(paths=templates_dirs)
    jinja_env.globals.update(config.get('template_globals', {}))

    for static_name, static_path in static_routes.iteritems():
        routes.append(Route(static_name, StaticHandler(static_path)))

    for name, value in collections.iteritems():
        if isinstance(value, BaseHandler):
            handler_dict[name] = value
            new_route = Route(name, value, config=config)
            if name != '':
                routes.append(new_route)
            else:
                root_route = new_route
            continue

        route_config = init_route_config(value, config)
        route_class = route_config.get('route_class', Route)

        if route_config.get('index_paths') == '$liveweb':
            live = create_live_handler(route_config)
            handler_dict[name] = live
            new_route = route_class(name, live, config=route_config)
            if name != '':
                routes.append(new_route)
            else:
                root_route = new_route
            continue

        query_handler = init_collection(route_config)

        wb_handler = create_wb_handler(
            query_handler=query_handler,
            config=route_config,
        )

        handler_dict[name] = wb_handler

        logging.debug('Adding Collection: ' + name)

        new_route = route_class(name, wb_handler,
                                config=route_config,
                                request_class=request_class)

        if name != '':
            routes.append(new_route)
        else:
            root_route = new_route

        # cdx query handler
        cdx_api_suffix = route_config.get('enable_cdx_api', False)

        if cdx_api_suffix:
            add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler,
                                route_class=route_class)

    if config.get('debug_echo_env', False):
        routes.append(Route('echo_env', DebugEchoEnvHandler()))

    if config.get('debug_echo_req', False):
        routes.append(Route('echo_req', DebugEchoHandler()))

    if root_route:
        routes.append(root_route)

    # resolve any cross handler references
    for route in routes:
        if hasattr(route.handler, 'resolve_refs'):
            route.handler.resolve_refs(handler_dict)

    # default to regular archival mode
    router = ArchivalRouter

    if config.get('enable_http_proxy', False):
        router = ProxyArchivalRouter

        view = init_view(config, 'proxy_select_html')

        if 'proxy_options' not in passed_config:
            passed_config['proxy_options'] = {}

        if view:
            passed_config['proxy_options']['proxy_select_view'] = view

        view = init_view(config, 'proxy_cert_download_html')

        if view:
            passed_config['proxy_options']['proxy_cert_download_view'] = view

    # Finally, create wb router
    return router(
        routes,
        port=port,
        abs_path=config.get('absolute_paths', True),
        home_view=init_view(config, 'home_html'),
        error_view=init_view(config, 'error_html'),
        config=config
    )
Exemplo n.º 32
0
 def load_from_index(self, base_dir, index_file):
     config = load_yaml_config(os.path.join(base_dir, index_file))
     for pattern in config['webarchive_index']:
         full = os.path.join(base_dir, pattern)
         return glob.glob(full)
Exemplo n.º 33
0
    def __init__(self, config_file='./config.yaml', custom_config=None):
        config = load_yaml_config(DEFAULT_CONFIG)

        if config_file:
            try:
                file_config = load_overlay_config('PYWB_CONFIG_FILE', config_file)
                config.update(file_config)
            except Exception as e:
                if not custom_config:
                    custom_config = {'debug': True}
                print(e)

        if custom_config:
            if 'collections' in custom_config and 'collections' in config:
                custom_config['collections'].update(config['collections'])
            if 'proxy' in custom_config and 'proxy' in config:
                custom_config['proxy'].update(config['proxy'])
            if 'recorder' in custom_config and 'recorder' in config:
                if isinstance(custom_config['recorder'], str):
                    custom_config['recorder'] = {'source_coll': custom_config['recorder']}

                if isinstance(config['recorder'], str):
                    config['recorder'] = {'source_coll': config['recorder']}

                config['recorder'].update(custom_config['recorder'])
                custom_config['recorder'] = config['recorder']

            config.update(custom_config)

        super(WarcServer, self).__init__(debug=config.get('debug', False))
        self.config = config

        recorder_config = self.config.get('recorder') or {}
        if isinstance(recorder_config, dict) and recorder_config.get('dedup_policy'):
            self.dedup_index_url = recorder_config.get('dedup_index_url', WarcServer.DEFAULT_DEDUP_URL)
            if self.dedup_index_url and not self.dedup_index_url.startswith('redis://'):
                raise Exception("The dedup_index_url must start with \"redis://\". Only Redis-based dedup index is supported at this time.")
        else:
            self.dedup_index_url = None

        self.root_dir = self.config.get('collections_root', '')
        self.index_paths = self.init_paths('index_paths')
        self.archive_paths = self.init_paths('archive_paths', self.root_dir)
        self.acl_paths = self.init_paths('acl_paths')

        self.default_access = self.config.get('default_access')

        self.rules_file = self.config.get('rules_file', '')

        if 'certificates' in self.config:
            certs_config = self.config['certificates']
            DefaultAdapters.live_adapter = PywbHttpAdapter(max_retries=Retry(3),
                                                           cert_reqs=certs_config.get('cert_reqs', 'CERT_NONE'),
                                                           ca_cert_dir=certs_config.get('ca_cert_dir'))
            DefaultAdapters.remote_adapter = PywbHttpAdapter(max_retries=Retry(3),
                                                             cert_reqs=certs_config.get('cert_reqs', 'CERT_NONE'),
                                                             ca_cert_dir=certs_config.get('ca_cert_dir'))

        self.auto_handler = None

        if self.config.get('enable_auto_colls', True):
            self.auto_handler = self.load_auto_colls()

        self.fixed_routes = self.load_colls()

        for name, route in iteritems(self.fixed_routes):
            if route == self.auto_handler:
                self.add_route('/' + name, route, path_param_name='param.coll', default_value='*')
            else:
                self.add_route('/' + name, route)

        if self.auto_handler:
            self.add_route('/<path_param_value>', self.auto_handler, path_param_name='param.coll')
Exemplo n.º 34
0
def create_wb_router(passed_config=None):
    passed_config = passed_config or {}

    defaults = load_yaml_config(DEFAULT_CONFIG)

    config = DictChain(passed_config, defaults)

    regular_router_class = config.get("regular_router_class", ArchivalRouter)
    proxy_router_class = config.get("proxy_router_class", ProxyArchivalRouter)
    default_route_class = config.get("route_class", Route)

    routes = []

    port = config.get('port')

    collections = config.get('collections', {})

    static_routes = config.get('static_routes', {})

    root_route = None

    # collections based on file system
    if config.get('enable_auto_colls', True):
        colls_loader_cls = config.get('colls_loader_cls', DirectoryCollsLoader)
        dir_loader = colls_loader_cls(config, static_routes, collections)
        dir_loader()
        #collections.update(dir_loader())

    if config.get('enable_memento', False):
        request_class = MementoRequest
    else:
        request_class = config.get("wb_request_class", WbRequest)

    # store live and replay handlers
    handler_dict = {}

    # setup template globals
    templates_dirs = config['templates_dirs']
    jinja_env = J2TemplateView.init_shared_env(
        paths=templates_dirs, packages=config['template_packages'])

    jinja_env.globals.update(config.get('template_globals', {}))

    for static_name, static_path in six.iteritems(static_routes):
        routes.append(
            default_route_class(static_name, StaticHandler(static_path)))

    for name, value in six.iteritems(collections):
        if isinstance(value, BaseHandler):
            handler_dict[name] = value
            new_route = Route(name, value, config=config)
            if name != '':
                routes.append(new_route)
            else:
                root_route = new_route
            continue

        route_config = init_route_config(value, config)
        route_class = route_config.get('route_class', default_route_class)

        if route_config.get('index_paths') == '$liveweb':
            live = create_live_handler(route_config)
            handler_dict[name] = live
            new_route = route_class(name, live, config=route_config)
            if name != '':
                routes.append(new_route)
            else:
                root_route = new_route
            continue

        query_handler = init_collection(route_config)

        wb_handler = create_wb_handler(
            query_handler=query_handler,
            config=route_config,
        )

        handler_dict[name] = wb_handler

        logging.debug('Adding Collection: ' + name)

        new_route = route_class(name,
                                wb_handler,
                                config=route_config,
                                request_class=request_class)

        if name != '':
            routes.append(new_route)
        else:
            root_route = new_route

        # cdx query handler
        cdx_api_suffix = route_config.get('enable_cdx_api', False)

        if cdx_api_suffix:
            add_cdx_api_handler(name,
                                cdx_api_suffix,
                                routes,
                                query_handler,
                                route_class=route_class)

    if config.get('debug_echo_env', False):
        routes.append(Route('echo_env', DebugEchoEnvHandler()))

    if config.get('debug_echo_req', False):
        routes.append(Route('echo_req', DebugEchoHandler()))

    if root_route:
        routes.append(root_route)

    # resolve any cross handler references
    for route in routes:
        if hasattr(route.handler, 'resolve_refs'):
            route.handler.resolve_refs(handler_dict)

    # default to regular archival mode
    router_constructor = regular_router_class

    if config.get('enable_http_proxy', False):
        router_constructor = proxy_router_class

        view = init_view(config, 'proxy_select_html')

        if 'proxy_options' not in passed_config:
            passed_config['proxy_options'] = {}

        if view:
            passed_config['proxy_options']['proxy_select_view'] = view

        view = init_view(config, 'proxy_cert_download_html')

        if view:
            passed_config['proxy_options']['proxy_cert_download_view'] = view

    print "ROUTER IS %s!!!" % regular_router_class

    # Finally, create wb router
    return router_constructor(routes,
                              port=port,
                              abs_path=config.get('absolute_paths', True),
                              home_view=init_view(config, 'home_html'),
                              error_view=init_view(config, 'error_html'),
                              info_view=init_view(config, 'info_json'),
                              config=config)
Exemplo n.º 35
0
 def load_yaml_file(self, root_dir, filename):
     filename = os.path.join(root_dir, filename)
     if os.path.isfile(filename):
         return load_yaml_config(filename)
     else:
         return {}
Exemplo n.º 36
0
 def store_new(self, coll, path, mtime):
     obj = load_yaml_config(path)
     self.cache[coll] = (mtime, obj)
     return obj