def test_validate_page_with_invalid_url(self):
        config = Config()

        page = PageFactory.create(url='http://[globo.com/1?item=test')

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[]
        )

        content = '<html><head></head></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = LinkWithRelCanonicalValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {'page.head': [{}]}

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
    def test_can_validate_image_requests_on_globo_html(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[],
            cache=self.sync_cache
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        reviewer.violation_definitions = {
            'single.size.img': {'default_value': 6},
            'total.requests.img': {'default_value': 50},
            'total.size.img': {'default_value': 100},
        }

        validator = ImageRequestsValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {
            'page.images': [
                (
                    'some_image.jpg',
                    Mock(status_code=200, text=self.get_file('2x2.png'))
                ) for i in range(60)
            ],
            'total.size.img': 106,
        }

        validator.validate()

        expect(validator.add_violation.call_args_list).to_include(
            call(
                key='total.requests.img',
                value={'total': 60, 'limit': 10},
                points=50
            ))

        expect(validator.add_violation.call_args_list).to_include(
            call(
                key='single.size.img',
                value={
                    'limit': 6,
                    'over_max_size': set([('some_image.jpg', 6.57421875)])
                },
                points=0.57421875
            ))
示例#3
0
    def test_can_validate_page_with_metatag_description_too_long(self):
        page = PageFactory.create()
        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[],
            cache=self.sync_cache
        )

        reviewer.violation_definitions = {
            'page.metatags.description_too_big': {'default_value': 300},
        }

        validator = MetaTagsValidator(reviewer)

        validator.add_violation = Mock()
        validator.review.data['meta.tags'] = [
            {'content': 'X' * 301, 'property': 'name', 'key': 'description'},
        ]
        validator.validate()
        validator.add_violation.assert_called_once_with(
            key='page.metatags.description_too_big',
            value={'max_size': 300},
            points=20
        )

        validator.add_violation = Mock()
        validator.review.data['meta.tags'] = [
            {'content': 'X' * 300, 'property': 'name', 'key': 'description'},
        ]
        validator.validate()
        expect(validator.add_violation.called).to_be_false()
示例#4
0
    def test_handle_sitemap_url_loaded(self):
        page = PageFactory.create(url="http://g1.globo.com/")

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[]
        )
        reviewer.enqueue = Mock()

        content = self.get_file('url_sitemap.xml')
        response = Mock(status_code=200, text=content)

        facter = SitemapFacter(reviewer)
        facter.async_get = Mock()

        facter.get_facts()

        facter.handle_sitemap_loaded("http://g1.globo.com/sitemap.xml", response)

        expect(facter.review.data['sitemap.files.size']["http://g1.globo.com/sitemap.xml"]).to_equal(0.296875)
        expect(facter.review.data['sitemap.urls']["http://g1.globo.com/sitemap.xml"]).to_equal(set(['http://domain.com/1.html', 'http://domain.com/2.html']))
        expect(facter.review.facts['total.size.sitemap']['value']).to_equal(0.296875)
        expect(facter.review.facts['total.size.sitemap.gzipped']['value']).to_equal(0.1494140625)
        expect(facter.review.data['total.size.sitemap']).to_equal(0.296875)
        expect(facter.review.data['total.size.sitemap.gzipped']).to_equal(0.1494140625)
        expect(facter.review.data['sitemap.files.urls']["http://g1.globo.com/sitemap.xml"]).to_equal(2)
        expect(facter.review.facts['total.sitemap.urls']['value']).to_equal(2)
示例#5
0
    def test_query_string_without_params(self):
        config = Config()
        config.FORCE_CANONICAL = False

        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[])

        content = '<html><head></head></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = LinkWithRelCanonicalValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {'page.head': [{}]}

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
示例#6
0
    def test_can_validate(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url="http://localhost:2368",
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[],
        )

        content = '<html><meta charset="UTF-8"></html>'

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = OpenGraphValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {"meta.tags": [{"key": "meta", "content": "utf-8", "property": ""}]}

        validator.validate()

        expect(validator.add_violation.call_args_list).to_include(
            call(points=200, key="absent.metatags.open_graph", value=["og:title", "og:type", "og:image", "og:url"])
        )
示例#7
0
    def test_no_title_tag(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        reviewer.content_loaded(page.url, Mock(status_code=200, text=content, headers={}))

        facter = TitleFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.add_fact.called).to_be_false()
        expect(facter.review.data).to_be_like({})
示例#8
0
    def test_can_get_fact_definitions(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = TitleFacter(reviewer)

        definitions = facter.get_fact_definitions()

        expect(definitions).to_length(1)
        expect('page.title' in definitions).to_be_true()
示例#9
0
    def test_can_validate_no_title_tag(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            validators=[])

        content = '<html></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = TitleValidator(reviewer)

        validator.add_violation = Mock()

        validator.validate()

        validator.add_violation.assert_called_once_with(
            key='page.title.not_found', value=page.url, points=50)
    def test_validator(self):

        config = Config()

        page = PageFactory.create()

        reviewer = Reviewer(
            api_url="http://localhost:2368",
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[],
        )

        url = "http://my-site.com/test.html"

        content = '<html><a href="%s" rel="nofollow">Test</a></html>' % url

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = LinkWithRelNofollowValidator(reviewer)
        validator.add_violation = Mock()

        validator.review.data = {"page.all_links": [{"href": url, "rel": "nofollow"}]}

        validator.validate()

        validator.add_violation.assert_called_once_with(
            key="invalid.links.nofollow", value=["http://my-site.com/test.html"], points=10
        )
示例#11
0
    def test_can_validate_no_title_tag(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[]
        )

        content = '<html></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = TitleValidator(reviewer)

        validator.add_violation = Mock()

        validator.validate()

        validator.add_violation.assert_called_once_with(
            key='page.title.not_found',
            value=page.url,
            points=50)
示例#12
0
    def test_javascript_link(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url="http://*****:*****@'+OAS_listpos+'!'+pos+'?'+OAS_query+'\" TARGET='+OAS_target+'>');</script>"

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(call(key="page.links", value=set([])))

        expect(facter.add_fact.call_args_list).to_include(call(key="total.number.links", value=0))
示例#13
0
    def _start_reviewer(self, job):
        if job:

            if count_url_levels(job['url']) > self.config.MAX_URL_LEVELS:
                self.info('Max URL levels! Details: %s' % job['url'])
                return

            self.debug('Starting Review for [%s]' % job['url'])
            reviewer = Reviewer(
                api_url=self.config.HOLMES_API_URL,
                page_uuid=job['page'],
                page_url=job['url'],
                page_score=0,
                config=self.config,
                validators=self.validators,
                facters=self.facters,
                search_provider=self.search_provider,
                async_get=self.async_get,
                wait=self.otto.wait,
                wait_timeout=0,  # max time to wait for all requests to finish
                db=self.db,
                cache=self.cache,
                publish=self.publish,
                girl=self.girl,
                fact_definitions=self.fact_definitions,
                violation_definitions=self.violation_definitions)

            reviewer.review()
示例#14
0
    def _start_reviewer(self, job):
        if job:

            if count_url_levels(job['url']) > self.config.MAX_URL_LEVELS:
                self.info('Max URL levels! Details: %s' % job['url'])
                return

            self.debug('Starting Review for [%s]' % job['url'])
            reviewer = Reviewer(
                api_url=self.config.HOLMES_API_URL,
                page_uuid=job['page'],
                page_url=job['url'],
                page_score=job['score'],
                increase_lambda_tax_method=self._increase_lambda_tax,
                config=self.config,
                validators=self.validators,
                facters=self.facters,
                async_get=self.async_get,
                wait=self.otto.wait,
                wait_timeout=0,  # max time to wait for all requests to finish
                db=self.db,
                cache=self.cache,
                publish=self.publish,
                fact_definitions=self.fact_definitions,
                violation_definitions=self.violation_definitions
            )

            reviewer.review()
示例#15
0
    def test_can_validate_css_requests_on_globo_html(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[],
            cache=self.sync_cache
        )

        reviewer.violation_definitions = {
            'total.requests.css': {'default_value': 1},
            'total.size.css': {'default_value': 0.0},
        }

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = CSSRequestsValidator(reviewer)
        css = {
            'url': 'some_style.css',
            'status': 200,
            'content': '#id{display:none}',
            'html': None
        }
        validator.get_response = Mock(return_value=css)

        validator.add_violation = Mock()

        validator.review.data = {
            'total.requests.css': 7,
            'total.size.css.gzipped': 0.05
        }

        validator.validate()

        expect(validator.add_violation.call_args_list).to_include(
            call(
                key='total.requests.css',
                value={'over_limit': 6, 'total_css_files': 7},
                points=30
            ))

        expect(validator.add_violation.call_args_list).to_include(
            call(
                key='total.size.css',
                value=0.05,
                points=0
            ))
示例#16
0
    def test_can_validate_without_meta_tags(self):
        config = Config()

        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[])

        content = '<html></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = OpenGraphValidator(reviewer)

        validator.add_violation = Mock()

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
    def test_no_body_tag(self):
        config = Config()

        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[]
        )

        content = '<html></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = SchemaOrgItemTypeValidator(reviewer)
        validator.add_violation = Mock()

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
示例#18
0
    def test_validate(self):
        config = Config()

        page = PageFactory.create(url='http://globo.com/1?item=test')

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[])

        content = '<html><head></head></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = LinkWithRelCanonicalValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {'page.head': [{}]}

        validator.validate()

        expect(validator.add_violation.call_args_list).to_include(
            call(key='absent.meta.canonical', value=None, points=30))
    def test_force_canonical(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url="http://localhost:2368",
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[],
            cache=self.sync_cache,
        )

        reviewer.violation_definitions = {"absent.meta.canonical": {"default_value": True}}

        content = "<html><head></head></html>"

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = LinkWithRelCanonicalValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {"page.head": [{}]}

        validator.validate()

        expect(validator.add_violation.call_args_list).to_include(
            call(key="absent.meta.canonical", value=None, points=30)
        )
    def test_query_string_without_params(self):
        config = Config()
        config.FORCE_CANONICAL = False

        page = PageFactory.create()

        reviewer = Reviewer(
            api_url="http://localhost:2368",
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[],
        )

        content = "<html><head></head></html>"

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = LinkWithRelCanonicalValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {"page.head": [{}]}

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
示例#21
0
    def test_handle_sitemap_url_loaded(self):
        page = PageFactory.create(url="http://g1.globo.com/")

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[]
        )
        reviewer.enqueue = Mock()

        content = self.get_file('url_sitemap.xml')
        response = Mock(status_code=200, text=content)

        facter = SitemapFacter(reviewer)
        facter.async_get = Mock()

        facter.get_facts()

        facter.handle_sitemap_loaded("http://g1.globo.com/sitemap.xml", response)

        expect(facter.review.data['sitemap.files.size']["http://g1.globo.com/sitemap.xml"]).to_equal(0.296875)
        expect(facter.review.data['sitemap.urls']["http://g1.globo.com/sitemap.xml"]).to_equal(set(['http://domain.com/1.html', 'http://domain.com/2.html']))
        expect(facter.review.facts['total.size.sitemap']['value']).to_equal(0.296875)
        expect(facter.review.facts['total.size.sitemap.gzipped']['value']).to_equal(0.1494140625)
        expect(facter.review.data['total.size.sitemap']).to_equal(0.296875)
        expect(facter.review.data['total.size.sitemap.gzipped']).to_equal(0.1494140625)
        expect(facter.review.data['sitemap.files.urls']["http://g1.globo.com/sitemap.xml"]).to_equal(2)
        expect(facter.review.facts['total.sitemap.urls']['value']).to_equal(2)
    def test_can_get_violation_definitions(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[]
        )

        reviewer.violation_definitions = {
            'total.size.js': {'default_value': 0.03},
            'total.requests.js': {'default_value': 1},
        }

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = JSRequestsValidator(reviewer)

        definitions = validator.get_violation_definitions()

        expect(definitions).to_length(2)
        expect('total.size.js' in definitions).to_be_true()
        expect('total.requests.js' in definitions).to_be_true()
示例#23
0
    def test_can_validate_page_without_meta_tags(self):
        config = Config()

        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[]
        )

        content = '<html></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = MetaTagsValidator(reviewer)
        validator.add_fact = Mock()
        validator.add_violation = Mock()
        validator.validate()

        validator.add_violation.assert_called_once_with(
            key='absent.metatags',
            value='No metatags.',
            points=100
        )
示例#24
0
    def test_handle_url_loaded(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url="http://localhost:2368",
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[],
        )

        content = self.get_file("globo.html")

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.async_get = Mock()
        facter.get_facts()

        facter.handle_url_loaded(page.url, response)

        expect(facter.review.data).to_include("page.links")
        data = set([(page.url, response)])
        expect(facter.review.data["page.links"]).to_equal(data)
示例#25
0
    def test_validate(self):
        config = Config()

        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[])

        content = '<html><body></body></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = SchemaOrgItemTypeValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {'page.body': [{}]}

        validator.validate()

        expect(validator.add_violation.call_args_list).to_include(
            call(key='absent.schema.itemscope', value=None, points=10))

        expect(validator.add_violation.call_args_list).to_include(
            call(key='absent.schema.itemtype', value=None, points=10))
示例#26
0
    def test_can_validate_css_requests_empty_html(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[],
            cache=self.sync_cache
        )

        reviewer.violation_definitions = {
            'total.requests.css': {'default_value': 1},
            'total.size.css': {'default_value': 0.0},
        }

        result = {
            'url': page.url,
            'status': 200,
            'content': None,
            'html': None
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = CSSRequestsValidator(reviewer)

        validator.add_violation = Mock()

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
示例#27
0
    def test_can_validate_without_meta_tags(self):
        config = Config()

        page = PageFactory.create()

        reviewer = Reviewer(
            api_url="http://localhost:2368",
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[],
        )

        content = "<html></html>"

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = OpenGraphValidator(reviewer)

        validator.add_violation = Mock()

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
    def test_can_validate_js_requests_empty_html(self):
        config = Config()

        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[]
        )

        result = {
            'url': page.url,
            'status': 200,
            'content': None,
            'html': None
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = JSRequestsValidator(reviewer)

        validator.add_violation = Mock()

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
示例#29
0
    def test_link_looks_like_image(self):
        page = PageFactory.create(url="http://globo.com/")

        reviewer = Reviewer(
            api_url="http://localhost:2368",
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[],
        )

        content = '<html><a href="http://globo.com/metal.png">Metal</a></html>'

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(call(key="page.links", value=set([])))

        expect(facter.add_fact.call_args_list).to_include(call(key="total.number.links", value=0))

        expect(facter.async_get.called).to_be_false()
示例#30
0
    def test_can_validate_title_size_with_domain(self):
        self.db.query(Key).delete()
        self.db.query(KeysCategory).delete()

        config = Config()
        config.MAX_TITLE_SIZE = 70

        key = Key(name='page.title.size')
        domain = DomainFactory.create(name='globo.com', url='http://globo.com')
        page = PageFactory.create(domain=domain, url='http://globo.com/a.html')

        self.sync_cache.redis.delete('violations-prefs-%s' % domain.name)

        DomainsViolationsPrefsFactory.create(
            domain=domain,
            key=key,
            value='10'
        )

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[],
            cache=self.sync_cache
        )

        title = 'a' * 80
        content = '<html><title>%s</title></html>' % title

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        reviewer.violation_definitions = {
            'page.title.size': {'default_value': 70, 'key': key},
        }

        validator = TitleValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {
            'page.title_count': 1,
            'page.title': title
        }

        validator.validate()

        validator.add_violation.assert_called_once_with(
            key='page.title.size',
            value={'max_size': 10, 'page_url': page.url},
            points=10
        )
    def test_validator(self):

        config = Config()

        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[]
        )

        url1 = 'http://globo.com/b.html'
        url2 = 'http://globo.com/a.html'

        content = '<html><a href="%s">Test</a><a href="%s">Test</a></html>' % (
            url1, url2)

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = LinkWithRedirectValidator(reviewer)

        validator.add_violation = Mock()

        status_307 = Mock(status_code=307, text='Temporary Redirect')
        status_302 = Mock(status_code=302, text='Found')

        validator.review.data = {
            'page.links': [
                (url1, status_307),
                (url2, status_302)
            ]
        }

        validator.validate()

        expect(validator.add_violation.call_args_list).to_include(
            call(
                key='link.redirect.307',
                value=307,
                points=10
            ))

        expect(validator.add_violation.call_args_list).to_include(
            call(
                key='link.redirect.302',
                value=302,
                points=10
            ))
示例#32
0
    def test_can_validate_title_size_with_domain(self):
        self.db.query(Key).delete()
        self.db.query(KeysCategory).delete()

        config = Config()
        config.MAX_TITLE_SIZE = 70

        key = Key(name='page.title.size')
        domain = DomainFactory.create(name='globo.com', url='http://globo.com')
        page = PageFactory.create(domain=domain, url='http://globo.com/a.html')

        self.sync_cache.redis.delete('violations-prefs-%s' % domain.name)

        DomainsViolationsPrefsFactory.create(domain=domain,
                                             key=key,
                                             value='10')

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[],
                            cache=self.sync_cache)

        title = 'a' * 80
        content = '<html><title>%s</title></html>' % title

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        reviewer.violation_definitions = {
            'page.title.size': {
                'default_value': 70,
                'key': key
            },
        }

        validator = TitleValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {'page.title_count': 1, 'page.title': title}

        validator.validate()

        validator.add_violation.assert_called_once_with(key='page.title.size',
                                                        value={
                                                            'max_size': 10,
                                                            'page_url':
                                                            page.url
                                                        },
                                                        points=10)
示例#33
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = MetaTagsFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        values = [{'content': 'utf-8', 'property': None, 'key': 'charset'},
                  {'content': 'text/html;charset=UTF-8', 'property': 'http-equiv', 'key': 'Content-Type'},
                  {'content': 'BKmmuVQac1JM6sKlj3IoXQvffyIRJvJfbicMouA2a88', 'property': 'name', 'key': 'google-site-verification'},
                  {'content': 'width=device-width, initial-scale=1.0, maximum-scale=1.0', 'property': 'name', 'key': 'viewport'},
                  {'content': u'globo.com - Absolutamente tudo sobre not\xedcias, esportes e entretenimento', 'property': 'property', 'key': 'og:title'},
                  {'content': 'website', 'property': 'property', 'key': 'og:type'},
                  {'content': 'http://www.globo.com/', 'property': 'property', 'key': 'og:url'},
                  {'content': 'http://s.glbimg.com/en/ho/static/globocom2012/img/gcom_marca_og.jpg', 'property': 'property', 'key': 'og:image'},
                  {'content': 'globo.com', 'property': 'property', 'key': 'og:site_name'},
                  {'content': u'S\xf3 na globo.com voc\xea encontra tudo sobre o conte\xfado e marcas das Organiza\xe7\xf5es Globo. O melhor acervo de v\xeddeos online sobre entretenimento, esportes e jornalismo do Brasil.', 'property': 'property', 'key': 'og:description'},
                  {'content': '224969370851736', 'property': 'property', 'key': 'fb:page_id'},
                  {'content': u'S\xf3 na globo.com voc\xea encontra tudo sobre o conte\xfado e marcas das Organiza\xe7\xf5es Globo. O melhor acervo de v\xeddeos online sobre entretenimento, esportes e jornalismo do Brasil.', 'property': 'name', 'key': 'description'},
                  {'content': u'Not\xedcias, Entretenimento, Esporte, Tecnologia, Portal, Conte\xfado, Rede Globo, TV Globo, V\xeddeos, Televis\xe3o', 'property': 'name', 'key': 'keywords'},
                  {'content': 'Globo.com', 'property': 'name', 'key': 'application-name'},
                  {'content': '#0669DE', 'property': 'name', 'key': 'msapplication-TileColor'},
                  {'content': 'http://s.glbimg.com/en/ho/static/globocom2012/img/globo-win-tile.png', 'property': 'name', 'key': 'msapplication-TileImage'}]

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='meta.tags',
                value=values,
            ))

        expect(facter.review.data).to_length(1)
        expect(facter.review.data).to_include('meta.tags')
        expect(facter.review.data).to_be_like({'meta.tags': values})
    def test_can_validate_heading_hierarchy(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[],
            cache=self.sync_cache
        )

        reviewer.violation_definitions = {
            'page.heading_hierarchy.size': {'default_value': 150},
        }

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)
        validator = HeadingHierarchyValidator(reviewer)

        # expecting no call of add_violation method
        validator.add_violation = Mock()
        validator.review.data = {
            'page.heading_hierarchy': [
                ('h1', 'Loren ipsum dolor sit amet'),
            ]
        }
        validator.validate()
        expect(validator.add_violation.called).to_be_false()

        # expecting calling add_violation for `page.heading_hierarchy.size`
        validator.add_violation = Mock()
        hh_list = [
            ('h1', 'Loren ipsum dolor sit amet' * 10),
            ('h1', 'Loren ipsum dolor sit amet' * 10),
        ]
        validator.review.data = {'page.heading_hierarchy': hh_list}
        validator.validate()
        expect(validator.add_violation.called).to_be_true()
        validator.add_violation.assert_called_once_with(
            key='page.heading_hierarchy.size',
            value={
                'max_size': 150, 'hh_list': hh_list,
            },
            points=40
        )
示例#35
0
    def test_can_validate_og_title(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[]
        )

        content = '<html>' \
            '<meta property="og:title" content="Metal" />' \
            '<meta property="og:type" content="video.movie" />' \
            '<meta property="og:url" content="http://a.com" />' \
            '<meta property="og:image" content="http://a.com/rock.png" />' \
            '</html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = OpenGraphValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {
            'meta.tags': [
                {
                    'key': 'og:title',
                    'content': 'Metal',
                    'property': 'property'
                }, {
                    'key': 'og:type',
                    'content': 'video.movie',
                    'property': 'property'
                }, {
                    'key': 'og:url',
                    'content': 'http://a.com',
                    'property': 'property'
                }, {
                    'key': 'og:image',
                    'content': 'http://a.com/rock.png',
                    'property': 'property'
                }
            ],
        }

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
    def test_can_validate_heading_hierarchy(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            validators=[],
                            cache=self.sync_cache)

        reviewer.violation_definitions = {
            'page.heading_hierarchy.size': {
                'default_value': 150
            },
        }

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)
        validator = HeadingHierarchyValidator(reviewer)

        # expecting no call of add_violation method
        validator.add_violation = Mock()
        validator.review.data = {
            'page.heading_hierarchy': [
                ('h1', 'Loren ipsum dolor sit amet'),
            ]
        }
        validator.validate()
        expect(validator.add_violation.called).to_be_false()

        # expecting calling add_violation for `page.heading_hierarchy.size`
        validator.add_violation = Mock()
        hh_list = [
            ('h1', 'Loren ipsum dolor sit amet' * 10),
            ('h1', 'Loren ipsum dolor sit amet' * 10),
        ]
        validator.review.data = {'page.heading_hierarchy': hh_list}
        validator.validate()
        expect(validator.add_violation.called).to_be_true()
        validator.add_violation.assert_called_once_with(
            key='page.heading_hierarchy.size',
            value={
                'max_size': 150,
                'hh_list': hh_list,
            },
            points=40)
示例#37
0
    def test_invalid_link(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><a href="http://]http://www.globo.com/malhacao">blah</a></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.links',
                value=set([])
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.links',
                value=0
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.invalid_links',
                value=1
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.invalid_links',
                value=set(['http://]http://www.globo.com/malhacao'])
            ))
示例#38
0
    def test_no_get_url_that_exceed_max_url_level(self):
        page = PageFactory.create(url='http://m.com/')

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = (
            '<html>'
            '<a href="http://m.com/test/">test</a>'
            '<a href="http://m.com/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/">m</a>'
            '<a href="http://m.com/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/">m</a>'
            '</html>'
        )

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_equal([
            call(
                key='page.links',
                value=set([])
            ),
            call(
                key='total.number.links',
                value=2
            ),
            call(
                key='total.number.invalid_links',
                value=0
            ),
            call(
                key='page.invalid_links',
                value=set([])
            )])
    def test_can_get_violation_definitions(self):
        config = Config()
        config.MAX_JS_REQUESTS_PER_PAGE = 1
        config.MAX_JS_KB_PER_PAGE_AFTER_GZIP = 0.03

        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = JSRequestsValidator(reviewer)

        definitions = validator.get_violation_definitions()

        expect(definitions).to_length(2)
        expect('total.size.js' in definitions).to_be_true()
        expect('total.requests.js' in definitions).to_be_true()

        total_size_message = validator.get_total_size_message(0.03)
        requests_js_message = validator.get_requests_js_message({
            'total_js_files': 7,
            'over_limit': 6
        })

        expect(total_size_message).to_equal(
            'There\'s 0.03kb of JavaScript in this page and that adds '
            'up to more download time slowing down the page rendering '
            'to the user.'
        )

        expect(requests_js_message).to_equal(
            'This page has 7 JavaScript request (6 over limit). Having too '
            'many requests impose a tax in the browser due to handshakes.'
        )
示例#40
0
    def test_can_validate_og_title(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            validators=[])

        content = '<html>' \
            '<meta property="og:title" content="Metal" />' \
            '<meta property="og:type" content="video.movie" />' \
            '<meta property="og:url" content="http://a.com" />' \
            '<meta property="og:image" content="http://a.com/rock.png" />' \
            '</html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer.get_response = Mock(return_value=result)

        validator = OpenGraphValidator(reviewer)
        validator.add_violation = Mock()
        validator.review.data = {
            'meta.tags': [{
                'key': 'og:title',
                'content': 'Metal',
                'property': 'property'
            }, {
                'key': 'og:type',
                'content': 'video.movie',
                'property': 'property'
            }, {
                'key': 'og:url',
                'content': 'http://a.com',
                'property': 'property'
            }, {
                'key': 'og:image',
                'content': 'http://a.com/rock.png',
                'property': 'property'
            }],
        }

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
示例#41
0
    def test_will_call_reviewer_add_fact(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[]
        )
        reviewer.add_fact = Mock()

        validator = Validator(reviewer)
        validator.add_fact('random.fact', 'value')
        reviewer.add_fact.assert_called_once_with('random.fact', 'value')
    def test_can_get_default_violations_values(self):
        config = Config()
        config.MAX_HEADING_HIEARARCHY_SIZE = 150

        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[])

        validator = HeadingHierarchyValidator(reviewer)

        violations_values = validator.get_default_violations_values(config)

        expect(violations_values).to_include('page.heading_hierarchy.size')

        expect(violations_values['page.heading_hierarchy.size']).to_length(2)

        expect(violations_values['page.heading_hierarchy.size']).to_equal({
            'value':
            config.MAX_HEADING_HIEARARCHY_SIZE,
            'description':
            config.get_description('MAX_HEADING_HIEARARCHY_SIZE')
        })
示例#43
0
    def test_add_violation_when_sitemap_is_too_large(self):
        page = PageFactory.create(url='http://globo.com')

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            validators=[])

        validator = SitemapValidator(reviewer)
        validator.review.data['sitemap.files.size'] = {
            'http://g1.globo.com/sitemap.xml': 10241
        }
        validator.review.data['sitemap.data'] = {
            'http://g1.globo.com/sitemap.xml': Mock(status_code=200,
                                                    text='data')
        }
        validator.review.data['sitemap.files.urls'] = {
            'http://g1.globo.com/sitemap.xml': 10
        }
        validator.review.data['sitemap.urls'] = {
            'http://g1.globo.com/sitemap.xml': []
        }
        validator.add_violation = Mock()

        validator.validate()

        validator.add_violation.assert_called_once_with(
            key='total.size.sitemap',
            value={
                'url': 'http://g1.globo.com/sitemap.xml',
                'size': 10.0009765625
            },
            points=10)
示例#44
0
    def test_add_violation_when_404(self):
        page = PageFactory.create(url='http://globo.com')

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            validators=[])

        validator = SitemapValidator(reviewer)
        validator.review.data['sitemap.files.size'] = {
            'http://g1.globo.com/sitemap.xml': 10
        }
        validator.review.data['sitemap.data'] = {
            'http://g1.globo.com/sitemap.xml': Mock(status_code=404, text=None)
        }
        validator.add_violation = Mock()

        validator.validate()

        validator.add_violation.assert_called_once_with(
            key='sitemap.not_found',
            value='http://g1.globo.com/sitemap.xml',
            points=100)
示例#45
0
    def test_add_violation_when_sitemap_with_good_link(self):
        page = PageFactory.create(url='http://globo.com')

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            validators=[])

        validator = SitemapValidator(reviewer)
        validator.review.data['sitemap.files.size'] = {
            'http://g1.globo.com/sitemap.xml': 10
        }
        validator.review.data['sitemap.data'] = {
            'http://g1.globo.com/sitemap.xml':
            Mock(status_code=200,
                 text='data',
                 url='http://g1.globo.com/%C3%BCmlat.php&amp;q=name')
        }
        validator.review.data['sitemap.files.urls'] = {
            'http://g1.globo.com/sitemap.xml': 20
        }
        validator.review.data['sitemap.urls'] = {
            'http://g1.globo.com/sitemap.xml':
            ['http://g1.globo.com/%C3%BCmlat.php&amp;q=name']
        }
        validator.add_violation = Mock()
        validator.flush = Mock()

        validator.validate()

        expect(validator.add_violation.call_count).to_equal(0)
        expect(validator.flush.call_count).to_equal(1)
示例#46
0
    def test_add_violation_when_sitemap_has_links_that_not_need_to_be_encoded(
            self):
        page = PageFactory.create(url='http://globo.com')

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            validators=[])

        validator = SitemapValidator(reviewer)
        validator.review.data['sitemap.files.size'] = {
            'http://g1.globo.com/sitemap.xml': 10
        }
        validator.review.data['sitemap.data'] = {
            'http://g1.globo.com/sitemap.xml': Mock(status_code=200,
                                                    text='data')
        }
        validator.review.data['sitemap.files.urls'] = {
            'http://g1.globo.com/sitemap.xml': 20
        }
        validator.review.data['sitemap.urls'] = {
            'http://g1.globo.com/sitemap.xml': ['http://g1.globo.com/1.html']
        }
        validator.add_violation = Mock()

        validator.validate()

        expect(validator.add_violation.call_count).to_equal(0)
示例#47
0
    def test_can_validate_with_headers(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[]
        )

        validator = LastModifiedValidator(reviewer)
        validator.add_violation = Mock()

        validator.review.data = {
            'page.last_modified': datetime.datetime(2014, 1, 13, 1, 16, 10)
        }

        validator.review.facts = {
            'page.last_modified': datetime.datetime(2014, 1, 13, 1, 16, 10)
        }

        validator.validate()

        expect(validator.add_violation.called).to_be_false()
示例#48
0
    def test_validate(self):
        page = PageFactory.create(url='http://globo.com/')

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[]
        )

        validator = RobotsValidator(reviewer)

        response = Mock(status_code=200, text='key:value')

        validator.review.data['robots.response'] = response
        validator.add_violation = Mock()

        validator.validate()

        expect(validator.add_violation.call_args_list).to_include(
            call(
                key='robots.sitemap.not_found',
                value=None,
                points=100
            ))

        expect(validator.add_violation.call_args_list).to_include(
            call(
                key='robots.disallow.not_found',
                value=None,
                points=100
            ))
    def test_can_get_default_violations_values(self):
        config = Config()
        config.REQUIRED_META_TAGS = ['description']

        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[])

        validator = RequiredMetaTagsValidator(reviewer)

        violations_values = validator.get_default_violations_values(config)

        expect(violations_values).to_include('absent.meta.tags')

        expect(violations_values['absent.meta.tags']).to_length(2)

        expect(violations_values['absent.meta.tags']).to_be_like({
            'value':
            config.REQUIRED_META_TAGS,
            'description':
            config.get_description('REQUIRED_META_TAGS')
        })
示例#50
0
    def test_can_get_default_violations_values(self):
        config = Config()
        config.SCHEMA_ORG_ITEMTYPE = [
            'http://schema.org/WebPage',
            'http://schema.org/AboutPage',
        ]

        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[])

        validator = SchemaOrgItemTypeValidator(reviewer)

        violations_values = validator.get_default_violations_values(config)

        expect(violations_values).to_include('invalid.schema.itemtype')

        expect(violations_values['invalid.schema.itemtype']).to_length(2)

        expect(violations_values['invalid.schema.itemtype']).to_equal({
            'value':
            config.SCHEMA_ORG_ITEMTYPE,
            'description':
            config.get_description('SCHEMA_ORG_ITEMTYPE')
        })
示例#51
0
    def test_can_get_default_violations_values(self):
        config = Config()
        config.MAX_TITLE_SIZE = 70

        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[])

        validator = TitleValidator(reviewer)

        violations_values = validator.get_default_violations_values(config)

        expect(violations_values).to_include('page.title.size')

        expect(violations_values['page.title.size']).to_length(2)

        expect(violations_values['page.title.size']).to_be_like({
            'value':
            config.MAX_TITLE_SIZE,
            'description':
            config.get_description('MAX_TITLE_SIZE')
        })
示例#52
0
    def test_can_get_default_violations_values(self):
        config = Config()
        config.FORCE_CANONICAL = False

        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[])

        validator = LinkWithRelCanonicalValidator(reviewer)

        violations_values = validator.get_default_violations_values(config)

        expect(violations_values).to_include('absent.meta.canonical')

        expect(violations_values['absent.meta.canonical']).to_length(2)

        expect(violations_values['absent.meta.canonical']).to_be_like({
            'value':
            config.FORCE_CANONICAL,
            'description':
            config.get_description('FORCE_CANONICAL')
        })
示例#53
0
    def test_can_get_default_violations_values(self):
        config = Config()
        config.METATAG_DESCRIPTION_MAX_SIZE = 300

        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[]
        )

        validator = MetaTagsValidator(reviewer)

        violations_values = validator.get_default_violations_values(config)

        expect(violations_values).to_include('page.metatags.description_too_big')

        expect(violations_values['page.metatags.description_too_big']).to_length(2)

        expect(violations_values['page.metatags.description_too_big']).to_be_like({
            'value': config.METATAG_DESCRIPTION_MAX_SIZE,
            'description': config.get_description('METATAG_DESCRIPTION_MAX_SIZE')
        })
示例#54
0
    def test_can_get_default_violations_values(self):
        config = Config()
        config.BLACKLIST_DOMAIN = ['a.com']

        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=config,
                            validators=[])

        validator = BlackListValidator(reviewer)

        violations_values = validator.get_default_violations_values(config)

        expect(violations_values).to_include('blacklist.domains')

        expect(violations_values['blacklist.domains']).to_length(2)

        expect(violations_values['blacklist.domains']).to_be_like({
            'value':
            config.BLACKLIST_DOMAIN,
            'description':
            config.get_description('BLACKLIST_DOMAIN')
        })
    def get_reviewer(self,
                     api_url=None,
                     page_uuid=None,
                     page_url='http://page.url',
                     page_score=0.0,
                     config=None):

        if api_url is None:
            api_url = self.get_url('/')

        if page_uuid is None:
            page_uuid = str(uuid4())

        if config is None:
            config = self.config

        return Reviewer(
            api_url=api_url,
            page_uuid=str(page_uuid),
            page_url=page_url,
            page_score=0.0,
            config=config,
            validators=self.validators,
            facters=self.facters,
            search_provider=self.search_provider,
            wait=self.otto.wait,
            wait_timeout=0,  # max time to wait for all requests to finish
            db=self.db,
            cache=self.cache,
            publish=self.publish,
            async_get=self.async_get,
            fact_definitions=self.fact_definitions,
            violation_definitions=self.violation_definitions,
        )
示例#56
0
    def test_get_robots_from_root_domain(self):
        page = PageFactory.create(url="http://www.globo.com")

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            validators=[])
        facter = RobotsFacter(reviewer)
        facter.async_get = Mock()
        facter.add_fact = Mock()
        facter.get_facts()

        robots_url = 'http://www.globo.com/robots.txt'

        expect(facter.review.data).to_length(1)
        expect(facter.review.data['robots.response']).to_equal(None)

        facter.async_get.assert_called_once_with(robots_url,
                                                 facter.handle_robots_loaded)

        response = Mock(status_code=200, text='', headers={})
        facter.handle_robots_loaded(robots_url, response)

        expect(facter.review.data['robots.response']).to_equal(response)

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='robots.url',
                value=robots_url,
            ))
示例#57
0
    def test_can_validate_last_modified(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            validators=[]
        )

        validator = LastModifiedValidator(reviewer)
        validator.add_violation = Mock()

        validator.review.data = {
            'page.last_modified': None
        }

        validator.review.facts = {
            'page.last_modified': None
        }

        validator.validate()

        validator.add_violation.assert_called_once_with(
            key='page.last_modified.not_found',
            value=page.url,
            points=50
        )