def __init__(self, rules, supported_options=None, skip_unsupported_rules=True, use_re2='auto', max_mem=256 * 1024 * 1024, rule_cls=AdblockRule): if supported_options is None: self.supported_options = rule_cls.BINARY_OPTIONS + ['domain'] else: self.supported_options = supported_options self.uses_re2 = _is_re2_supported() if use_re2 == 'auto' else use_re2 self.re2_max_mem = max_mem self.rule_cls = rule_cls self.skip_unsupported_rules = skip_unsupported_rules _params = dict((opt, True) for opt in self.supported_options) self.rules = [ r for r in (r if isinstance(r, rule_cls) else rule_cls(r) for r in rules) if (r.regex or r.options) and r.matching_supported(_params) ] # "advanced" rules are rules with options, # "basic" rules are rules without options advanced_rules, basic_rules = split_data(self.rules, lambda r: r.options) # Rules with domain option are handled separately: # if user passes a domain we can discard all rules which # require another domain. So we build an index: # {domain: [rules_which_require_it]}, and only check # rules which require our domain. If a rule doesn't require any # domain. # TODO: what about ~rules? Should we match them earlier? domain_required_rules, non_domain_rules = split_data( advanced_rules, lambda r: ('domain' in r.options and any(r.options["domain"].values()))) # split rules into blacklists and whitelists self.blacklist, self.whitelist = self._split_bw(basic_rules) _combined = partial(_combined_regex, use_re2=self.uses_re2, max_mem=max_mem) self.blacklist_re = _combined([r.regex for r in self.blacklist]) self.whitelist_re = _combined([r.regex for r in self.whitelist]) self.blacklist_with_options, self.whitelist_with_options = \ self._split_bw(non_domain_rules) self.blacklist_require_domain, self.whitelist_require_domain = \ self._split_bw_domain(domain_required_rules)
def __init__(self, rules, supported_options=None, skip_unsupported_rules=True, use_re2='auto', max_mem=256*1024*1024, rule_cls=AdblockRule): if supported_options is None: self.supported_options = rule_cls.BINARY_OPTIONS + ['domain'] else: self.supported_options = supported_options self.uses_re2 = _is_re2_supported() if use_re2 == 'auto' else use_re2 self.re2_max_mem = max_mem self.rule_cls = rule_cls self.skip_unsupported_rules = skip_unsupported_rules _params = dict((opt, True) for opt in self.supported_options) self.rules = [ r for r in ( r if isinstance(r, rule_cls) else rule_cls(r) for r in rules ) if r.regex and r.matching_supported(_params) ] # "advanced" rules are rules with options, # "basic" rules are rules without options advanced_rules, basic_rules = split_data(self.rules, lambda r: r.options) # Rules with domain option are handled separately: # if user passes a domain we can discard all rules which # require another domain. So we build an index: # {domain: [rules_which_require_it]}, and only check # rules which require our domain. If a rule doesn't require any # domain. # TODO: what about ~rules? Should we match them earlier? domain_required_rules, non_domain_rules = split_data( advanced_rules, lambda r: ( 'domain' in r.options and any(r.options["domain"].values()) ) ) # split rules into blacklists and whitelists self.blacklist, self.whitelist = self._split_bw(basic_rules) _combined = partial(_combined_regex, use_re2=self.uses_re2, max_mem=max_mem) self.blacklist_re = _combined([r.regex for r in self.blacklist]) self.whitelist_re = _combined([r.regex for r in self.whitelist]) self.blacklist_with_options, self.whitelist_with_options = \ self._split_bw(non_domain_rules) self.blacklist_require_domain, self.whitelist_require_domain = \ self._split_bw_domain(domain_required_rules)
def __init__(self, rules, supported_options=None, skip_unsupported_rules=True, use_re2='auto', max_mem=256*1024*1024, rule_cls=AdblockRule): if supported_options is None: self.supported_options = rule_cls.BINARY_OPTIONS + ['domain'] else: self.supported_options = supported_options self.uses_re2 = _is_re2_supported() if use_re2 == 'auto' else use_re2 self.re2_max_mem = max_mem self.rule_cls = rule_cls self.skip_unsupported_rules = skip_unsupported_rules _params = dict((opt, True) for opt in self.supported_options) self.rules = [ r for r in (self.rule_cls(r) for r in rules) if r.regex and r.matching_supported(_params) ] # "advanced" rules are rules with options, # "basic" rules are rules without options advanced_rules, basic_rules = split_data(self.rules, lambda r: r.options) # rules with domain option are handled separately: # we may discard most rules based on domain information, # so parser builds an index for that later. domain_rules, non_domain_rules = split_data( advanced_rules, lambda r: 'domain' in r.options ) # split rules into blacklists and whitelists self.blacklist, self.whitelist = self._split_bw(basic_rules) _combined = partial(_combined_regex, use_re2=self.uses_re2, max_mem=max_mem) self.blacklist_re = _combined([r.regex for r in self.blacklist]) self.whitelist_re = _combined([r.regex for r in self.whitelist]) self.blacklist_adv, self.whitelist_adv = self._split_bw(non_domain_rules) self.blacklist_domains, self.whitelist_domains = self._split_bw_domain(domain_rules)
def _split_bw(cls, rules): return split_data(rules, lambda r: not r.is_exception)