def build_url_filter(self, spec): """make a filter for links""" respect_nofollow = spec.get('respect_nofollow', True) if spec.get("links_to_follow") == "none": url_filterf = lambda x: False elif spec.get("links_to_follow") == "all": if respect_nofollow: url_filterf = lambda x: x.nofollow else: url_filterf = lambda x: True else: # patterns patterns = spec.get('follow_patterns') excludes = spec.get('exclude_patterns') pattern_fn = include_exclude_filter(patterns, excludes) if respect_nofollow: url_filterf = lambda x: not x.nofollow and pattern_fn(x.url) else: url_filterf = lambda x: pattern_fn(x.url) self.url_filterf = url_filterf
def _build_js_url_filter(self, spec): if not self.js_enabled: return lambda x: None enable_patterns = spec.get('js_enable_patterns') disable_patterns = spec.get('js_disable_patterns') return include_exclude_filter(enable_patterns, disable_patterns)