def test_single_file_combined(self): agg = SimpleAggregator( {'source': FileAccessIndexSource(TEST_EXCL_PATH + 'list1.aclj')}) access = AccessChecker(agg, default_access='block') edx = access.find_access_rule('http://example.com/abc/page.html') assert edx['urlkey'] == 'com,example)/abc/page.html' assert edx['access'] == 'allow' edx = access.find_access_rule('http://example.com/abc/page.htm') assert edx['urlkey'] == 'com,example)/abc' assert edx['access'] == 'block' edx = access.find_access_rule('http://example.com/abc/') assert edx['urlkey'] == 'com,example)/abc' assert edx['access'] == 'block' edx = access.find_access_rule('http://foo.example.com/') assert edx['urlkey'] == 'com,example,' assert edx['access'] == 'exclude' edx = access.find_access_rule('http://example.com/') assert edx['urlkey'] == 'com,' assert edx['access'] == 'allow' edx = access.find_access_rule('foo.net') assert edx['urlkey'] == '' assert edx['access'] == 'block' edx = access.find_access_rule('https://example.net/abc/path/other') assert edx['urlkey'] == '' assert edx['access'] == 'block'
def test_excludes_dir(self): agg = DirectoryAccessSource(TEST_EXCL_PATH) access = AccessChecker(agg, default_access='block') edx = access.find_access_rule('http://example.com/') assert edx['urlkey'] == 'com,example)/' assert edx['access'] == 'allow' edx = access.find_access_rule('http://example.bo') assert edx['urlkey'] == 'bo,example)/' assert edx['access'] == 'exclude' edx = access.find_access_rule('https://example.com/foo/path') assert edx['urlkey'] == 'com,example)/foo' assert edx['access'] == 'exclude' edx = access.find_access_rule('https://example.net/abc/path/other') assert edx['urlkey'] == 'net,example)/abc/path' assert edx['access'] == 'block' # exact-only matchc edx = access.find_access_rule('https://www.iana.org/') assert edx['urlkey'] == 'org,iana)/###' assert edx['access'] == 'allow' edx = access.find_access_rule('https://www.iana.org/any/other') assert edx['urlkey'] == 'org,iana)/' assert edx['access'] == 'exclude' edx = access.find_access_rule('https://www.iana.org/x') assert edx['urlkey'] == 'org,iana)/' assert edx['access'] == 'exclude'
def test_blocks_only(self): agg = SimpleAggregator( {'source': FileAccessIndexSource(TEST_EXCL_PATH + 'blocks.aclj')}) access = AccessChecker(agg) edx = access.find_access_rule('https://example.com/foo') assert edx['urlkey'] == 'com,example)/foo' assert edx['access'] == 'exclude' edx = access.find_access_rule('https://example.com/food') assert edx['urlkey'] == 'com,example)/foo' assert edx['access'] == 'exclude' edx = access.find_access_rule('https://example.com/foo/path') assert edx['urlkey'] == 'com,example)/foo' assert edx['access'] == 'exclude' edx = access.find_access_rule('https://example.net/abc/path') assert edx['urlkey'] == 'net,example)/abc/path' assert edx['access'] == 'block' edx = access.find_access_rule('https://example.net/abc/path/other') assert edx['urlkey'] == 'net,example)/abc/path' assert edx['access'] == 'block' edx = access.find_access_rule('https://example.net/fo') assert edx['urlkey'] == '' assert edx['access'] == 'allow'
def load_auto_colls(self): if not self.root_dir: print('No Root Dir, Skip Auto Colls!') return dir_source = CacheDirectoryIndexSource(base_prefix=self.root_dir, base_dir=self.index_paths, config=self.config) access_checker = AccessChecker( CacheDirectoryAccessSource(self.acl_paths), self.default_access) if self.dedup_index_url: source = SimpleAggregator({ 'dedup': RedisMultiKeyIndexSource(self.dedup_index_url), 'dir': dir_source }) else: source = dir_source return DefaultResourceHandler(source, self.archive_paths, rules_file=self.rules_file, access_checker=access_checker)
def find_match(self, r): """Finds a matching acl rule :param argparse.Namespace r: Parsed result from ArgumentParser :rtype: None """ access_checker = AccessChecker(self.acl_file, '<default>') rule = access_checker.find_access_rule(r.url, acl_user=r.user) print('Matched rule:') print('') if rule['urlkey'] == '': print(' <No Match, Using Default Rule>') print('') else: self.print_rule(rule)
def load_coll(self, name, coll_config): if coll_config == '$all' and self.auto_handler: return self.auto_handler if isinstance(coll_config, str): index = coll_config archive_paths = None acl_paths = None default_access = self.default_access elif isinstance(coll_config, dict): index = coll_config.get('index') if not index: index = coll_config.get('index_paths') archive_paths = coll_config.get('archive_paths') acl_paths = coll_config.get('acl_paths') default_access = coll_config.get('default_access', self.default_access) surt_ordered = coll_config.get('surt_ordered', True) else: raise Exception('collection config must be string or dict') # INDEX CONFIG if index: agg = init_index_agg({name: index}, surt_ordered=surt_ordered) else: if not isinstance(coll_config, dict): raise Exception('collection config missing') sequence = coll_config.get('sequence') if sequence: return self.init_sequence(name, sequence) index_group = coll_config.get('index_group') if not index_group: raise Exception('no index, index_group or sequence found') timeout = int(coll_config.get('timeout', 0)) agg = init_index_agg(index_group, True, timeout, surt_ordered=surt_ordered) # ARCHIVE CONFIG if not archive_paths: archive_paths = self.config.get('archive_paths') # ACCESS CONFIG access_checker = None if acl_paths: access_checker = AccessChecker(acl_paths, default_access) return DefaultResourceHandler(agg, archive_paths, rules_file=self.rules_file, access_checker=access_checker)
def load_auto_colls(self): if not self.root_dir: print('No Root Dir, Skip Auto Colls!') return dir_source = CacheDirectoryIndexSource(base_prefix=self.root_dir, base_dir=self.index_paths, config=self.config) access_checker = AccessChecker(CacheDirectoryAccessSource(self.acl_paths), self.default_access) return DefaultResourceHandler(dir_source, self.archive_paths, rules_file=self.rules_file, access_checker=access_checker)
def test_allows_only_default_block(self): agg = SimpleAggregator( {'source': FileAccessIndexSource(TEST_EXCL_PATH + 'allows.aclj')}) access = AccessChecker(agg, default_access='block') edx = access.find_access_rule('http://example.net') assert edx['urlkey'] == 'net,' edx = access.find_access_rule('http://foo.example.net/abc') assert edx['urlkey'] == 'net,' edx = access.find_access_rule('https://example.net/test/') assert edx['urlkey'] == 'net,example)/test' edx = access.find_access_rule('https://example.org/') assert edx['urlkey'] == '' assert edx['access'] == 'block' edx = access.find_access_rule('https://abc.domain.net/path') assert edx['urlkey'] == 'net,domain,' edx = access.find_access_rule('https://domain.neta/path') assert edx['urlkey'] == '' assert edx['access'] == 'block'
def test_excludes_dir(self): agg = DirectoryAccessSource(TEST_EXCL_PATH) access = AccessChecker(agg, default_access='block') edx = access.find_access_rule('http://example.com/') assert edx['urlkey'] == 'com,example)/' assert edx['access'] == 'allow' edx = access.find_access_rule('http://example.bo') assert edx['urlkey'] == 'bo,example)/' assert edx['access'] == 'exclude' edx = access.find_access_rule('https://example.com/foo/path') assert edx['urlkey'] == 'com,example)/foo' assert edx['access'] == 'exclude' edx = access.find_access_rule('https://example.net/abc/path/other') assert edx['urlkey'] == 'net,example)/abc/path' assert edx['access'] == 'block' # exact-only match edx = access.find_access_rule('https://www.iana.org/') assert edx['urlkey'] == 'org,iana)/###' assert edx['access'] == 'allow' edx = access.find_access_rule('https://www.iana.org/any/other') assert edx['urlkey'] == 'org,iana)/' assert edx['access'] == 'exclude' edx = access.find_access_rule('https://www.iana.org/x') assert edx['urlkey'] == 'org,iana)/' assert edx['access'] == 'exclude' # exact-only match, first line in *.aclj file edx = access.find_access_rule( 'https://www.iana.org/exact/match/first/line/aclj/') assert edx['urlkey'] == 'org,iana)/exact/match/first/line/aclj###' assert edx['access'] == 'allow' # exact-only match, single rule in *.aclj file edx = access.find_access_rule('https://www.lonesome-rule.org/') assert edx['urlkey'] == 'org,lonesome-rule)/###' assert edx['access'] == 'allow'