Exemplo n.º 1
0
    def build_url_filter(self, spec):
        """make a filter for links"""
        respect_nofollow = spec.get('respect_nofollow', True)

        if spec.get("links_to_follow") == "none":
            url_filterf = lambda x: False
        elif spec.get("links_to_follow") == "all":
            if respect_nofollow:
                url_filterf = lambda x: x.nofollow
            else:
                url_filterf = lambda x: True
        else: # patterns
            patterns = spec.get('follow_patterns')
            excludes = spec.get('exclude_patterns')
            pattern_fn = include_exclude_filter(patterns, excludes)

            if respect_nofollow:
                url_filterf = lambda x: not x.nofollow and pattern_fn(x.url)
            else:
                url_filterf = lambda x: pattern_fn(x.url)

        self.url_filterf = url_filterf
Exemplo n.º 2
0
    def build_url_filter(self, spec):
        """make a filter for links"""
        respect_nofollow = spec.get('respect_nofollow', True)

        if spec.get("links_to_follow") == "none":
            url_filterf = lambda x: False
        elif spec.get("links_to_follow") == "all":
            if respect_nofollow:
                url_filterf = lambda x: x.nofollow
            else:
                url_filterf = lambda x: True
        else:  # patterns
            patterns = spec.get('follow_patterns')
            excludes = spec.get('exclude_patterns')
            pattern_fn = include_exclude_filter(patterns, excludes)

            if respect_nofollow:
                url_filterf = lambda x: not x.nofollow and pattern_fn(x.url)
            else:
                url_filterf = lambda x: pattern_fn(x.url)

        self.url_filterf = url_filterf
Exemplo n.º 3
0
 def _build_js_url_filter(self, spec):
     if not self.js_enabled:
         return lambda x: None
     enable_patterns = spec.get('js_enable_patterns')
     disable_patterns = spec.get('js_disable_patterns')
     return include_exclude_filter(enable_patterns, disable_patterns)
Exemplo n.º 4
0
 def _build_js_url_filter(self, spec):
     if not self.js_enabled:
         return lambda x: None
     enable_patterns = spec.get('js_enable_patterns')
     disable_patterns = spec.get('js_disable_patterns')
     return include_exclude_filter(enable_patterns, disable_patterns)