def validate(self):

        links = self.get_links()

        page_domain, domain_url = get_domain_from_url(self.review.page_url)

        rel_nofollow = []

        for link in links:
            href = link.get('href')

            if not is_valid(href):
                continue

            link_domain, link_domain_url = get_domain_from_url(href)

            if link.get('rel') == 'nofollow' and page_domain == link_domain:
                rel_nofollow.append(href)

        if rel_nofollow:
            self.add_violation(
                key='invalid.links.nofollow',
                value=rel_nofollow,
                points=10 * len(rel_nofollow)
            )
Exemplo n.º 2
0
 def normalize_url(self, url):
     parse = is_valid(url)
     if parse:
         if not self.is_absolute(url):
             url = self.rebase(url)
         return self.url_ends_with_slash(url)
     return None
Exemplo n.º 3
0
 def normalize_url(self, url):
     parse = is_valid(url)
     if parse:
         if not self.is_absolute(url):
             url = self.rebase(url)
         return url
     return None
Exemplo n.º 4
0
    def validate(self):
        url = self.reviewer.page_url
        parsed_url = is_valid(url)
        path = parsed_url.path

        if '_' in path:
            self.add_violation(key='invalid.url_word_separator',
                               value=url,
                               points=10)
Exemplo n.º 5
0
    def validate(self):
        url = self.reviewer.page_url
        parsed_url = is_valid(url)
        path = parsed_url.path

        if '_' in path:
            self.add_violation(
                key='invalid.url_word_separator',
                value=url,
                points=10
            )
    def get_canonical_urls(self):
        url = self.reviewer.page_url
        parsed_url = is_valid(url)
        scheme_url = parsed_url.scheme
        domain_url = parsed_url.netloc

        if domain_url.startswith('www.'):
            www_url = url.rstrip('/')
            no_www_url = '{}://{}'.format(scheme_url, domain_url[4:])
        else:
            no_www_url = url.rstrip('/')
            www_url = '{}://www.{}'.format(scheme_url, domain_url)

        return {'www_url': www_url, 'no_www_url': no_www_url}
    def validate(self):
        if not self.config.FORCE_CANONICAL:
            # Only pages with query string parameters
            if self.page_url:
                if not is_valid(self.page_url):
                    return

                if not urlparse(self.page_url).query:
                    return

        head = self.get_head()
        if head:
            canonical = [item for item in head if item.get('rel') == 'canonical']

            if not canonical:
                self.add_violation(
                    key='absent.meta.canonical',
                    value=None,
                    points=30
                )
Exemplo n.º 8
0
    def validate(self):
        blacklist_domains = self.get_violation_pref('blacklist.domains')

        domains = []

        links = self.get_links()

        for link in links:
            href = link.get('href')

            if not is_valid(href):
                continue

            link_domain, link_domain_url = get_domain_from_url(href)
            if link_domain in blacklist_domains:
                domains.append(href)

        if domains:
            self.add_violation(key='blacklist.domains',
                               value=domains,
                               points=100 * len(domains))
Exemplo n.º 9
0
    def validate(self):
        force_canonical = self.get_violation_pref('absent.meta.canonical')

        if not force_canonical:
            # Only pages with query string parameters
            if self.page_url:
                if not is_valid(self.page_url):
                    return

                if not urlparse(self.page_url).query:
                    return

        head = self.get_head()
        if head:
            canonical = [
                item for item in head if item.get('rel') == 'canonical'
            ]

            if not canonical:
                self.add_violation(key='absent.meta.canonical',
                                   value=None,
                                   points=30)
Exemplo n.º 10
0
    def validate(self):
        blacklist_domains = self.get_violation_pref('blacklist.domains')

        domains = []

        links = self.get_links()

        for link in links:
            href = link.get('href')

            if not is_valid(href):
                continue

            link_domain, link_domain_url = get_domain_from_url(href)
            if link_domain in blacklist_domains:
                domains.append(href)

        if domains:
            self.add_violation(
                key='blacklist.domains',
                value=domains,
                points=100 * len(domains)
            )
Exemplo n.º 11
0
    def validate(self):

        links = self.get_links()

        page_domain, domain_url = get_domain_from_url(self.review.page_url)

        rel_nofollow = []

        for link in links:
            href = link.get('href')

            if not is_valid(href):
                continue

            link_domain, link_domain_url = get_domain_from_url(href)

            if link.get('rel') == 'nofollow' and page_domain == link_domain:
                rel_nofollow.append(href)

        if rel_nofollow:
            self.add_violation(key='invalid.links.nofollow',
                               value=rel_nofollow,
                               points=10 * len(rel_nofollow))