示例#1
0
    def test_can_load_url_with_empy_headers(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content),
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LastModifiedFacter(reviewer)
        facter.add_fact = Mock()
        facter.get_facts()

        expect(facter.review.data).to_length(0)
        expect(facter.review.data).to_be_like({})
        expect(facter.add_fact.called).to_be_false()
示例#2
0
    def test_can_get_fact_definitions(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            facters=[])

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = TitleFacter(reviewer)

        definitions = facter.get_fact_definitions()

        expect(definitions).to_length(1)
        expect('page.title' in definitions).to_be_true()
示例#3
0
    def test_handle_url_loaded(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.async_get = Mock()
        facter.get_facts()

        facter.handle_url_loaded(page.url, response)

        expect(facter.review.data).to_include('page.links')
        data = set([(page.url, response)])
        expect(facter.review.data['page.links']).to_equal(data)
示例#4
0
    def test_link_looks_like_image(self):
        page = PageFactory.create(url="http://globo.com/")

        reviewer = Reviewer(
            api_url="http://localhost:2368",
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[],
        )

        content = '<html><a href="http://globo.com/metal.png">Metal</a></html>'

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(call(key="page.links", value=set([])))

        expect(facter.add_fact.call_args_list).to_include(call(key="total.number.links", value=0))

        expect(facter.async_get.called).to_be_false()
示例#5
0
    def test_javascript_link(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url="http://*****:*****@'+OAS_listpos+'!'+pos+'?'+OAS_query+'\" TARGET='+OAS_target+'>');</script>"

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(call(key="page.links", value=set([])))

        expect(facter.add_fact.call_args_list).to_include(call(key="total.number.links", value=0))
示例#6
0
    def test_no_title_tag(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        reviewer.content_loaded(page.url, Mock(status_code=200, text=content, headers={}))

        facter = TitleFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.add_fact.called).to_be_false()
        expect(facter.review.data).to_be_like({})
示例#7
0
    def test_handle_url_loaded(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url="http://localhost:2368",
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[],
        )

        content = self.get_file("globo.html")

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.async_get = Mock()
        facter.get_facts()

        facter.handle_url_loaded(page.url, response)

        expect(facter.review.data).to_include("page.links")
        data = set([(page.url, response)])
        expect(facter.review.data["page.links"]).to_equal(data)
示例#8
0
    def test_handle_url_loaded_with_empty_content(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            facters=[])

        content = ''
        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': content
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = JSFacter(reviewer)
        facter.async_get = Mock()
        facter.get_facts()

        facter.handle_url_loaded(page.url, response)

        expect(facter.review.data).to_include('total.size.js')
        expect(facter.review.data['total.size.js']).to_equal(0)

        expect(facter.review.data).to_include('total.size.js.gzipped')
        expect(facter.review.data['total.size.js.gzipped']).to_equal(0)
示例#9
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            facters=[])

        content = '<html><body class="test"></body></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = BodyFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.review.data).to_length(1)
        expect(facter.review.data).to_include('page.body')
        expect(facter.review.data['page.body'][0].tag).to_equal('body')

        expect(facter.add_fact.called).to_be_false()
示例#10
0
    def test_can_get_fact_definitions(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = TitleFacter(reviewer)

        definitions = facter.get_fact_definitions()

        expect(definitions).to_length(1)
        expect('page.title' in definitions).to_be_true()
示例#11
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = MetaTagsFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        values = [{'content': 'utf-8', 'property': None, 'key': 'charset'},
                  {'content': 'text/html;charset=UTF-8', 'property': 'http-equiv', 'key': 'Content-Type'},
                  {'content': 'BKmmuVQac1JM6sKlj3IoXQvffyIRJvJfbicMouA2a88', 'property': 'name', 'key': 'google-site-verification'},
                  {'content': 'width=device-width, initial-scale=1.0, maximum-scale=1.0', 'property': 'name', 'key': 'viewport'},
                  {'content': u'globo.com - Absolutamente tudo sobre not\xedcias, esportes e entretenimento', 'property': 'property', 'key': 'og:title'},
                  {'content': 'website', 'property': 'property', 'key': 'og:type'},
                  {'content': 'http://www.globo.com/', 'property': 'property', 'key': 'og:url'},
                  {'content': 'http://s.glbimg.com/en/ho/static/globocom2012/img/gcom_marca_og.jpg', 'property': 'property', 'key': 'og:image'},
                  {'content': 'globo.com', 'property': 'property', 'key': 'og:site_name'},
                  {'content': u'S\xf3 na globo.com voc\xea encontra tudo sobre o conte\xfado e marcas das Organiza\xe7\xf5es Globo. O melhor acervo de v\xeddeos online sobre entretenimento, esportes e jornalismo do Brasil.', 'property': 'property', 'key': 'og:description'},
                  {'content': '224969370851736', 'property': 'property', 'key': 'fb:page_id'},
                  {'content': u'S\xf3 na globo.com voc\xea encontra tudo sobre o conte\xfado e marcas das Organiza\xe7\xf5es Globo. O melhor acervo de v\xeddeos online sobre entretenimento, esportes e jornalismo do Brasil.', 'property': 'name', 'key': 'description'},
                  {'content': u'Not\xedcias, Entretenimento, Esporte, Tecnologia, Portal, Conte\xfado, Rede Globo, TV Globo, V\xeddeos, Televis\xe3o', 'property': 'name', 'key': 'keywords'},
                  {'content': 'Globo.com', 'property': 'name', 'key': 'application-name'},
                  {'content': '#0669DE', 'property': 'name', 'key': 'msapplication-TileColor'},
                  {'content': 'http://s.glbimg.com/en/ho/static/globocom2012/img/globo-win-tile.png', 'property': 'name', 'key': 'msapplication-TileImage'}]

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='meta.tags',
                value=values,
            ))

        expect(facter.review.data).to_length(1)
        expect(facter.review.data).to_include('meta.tags')
        expect(facter.review.data).to_be_like({'meta.tags': values})
示例#12
0
    def test_invalid_link(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><a href="http://]http://www.globo.com/malhacao">blah</a></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.links',
                value=set([])
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.links',
                value=0
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.invalid_links',
                value=1
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.invalid_links',
                value=set(['http://]http://www.globo.com/malhacao'])
            ))
示例#13
0
    def test_invalid_link(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><a href="http://]http://www.globo.com/malhacao">blah</a></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.links',
                value=set([])
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.links',
                value=0
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.invalid_links',
                value=1
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.invalid_links',
                value=set(['http://]http://www.globo.com/malhacao'])
            ))
示例#14
0
    def test_no_get_url_that_exceed_max_url_level(self):
        page = PageFactory.create(url='http://m.com/')

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = (
            '<html>'
            '<a href="http://m.com/test/">test</a>'
            '<a href="http://m.com/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/">m</a>'
            '<a href="http://m.com/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/">m</a>'
            '</html>'
        )

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_equal([
            call(
                key='page.links',
                value=set([])
            ),
            call(
                key='total.number.links',
                value=1
            ),
            call(
                key='total.number.invalid_links',
                value=0
            ),
            call(
                key='page.invalid_links',
                value=set([])
            )])
示例#15
0
    def test_no_get_url_that_exceed_max_url_level(self):
        page = PageFactory.create(url='http://m.com/')

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = (
            '<html>'
            '<a href="http://m.com/test/">test</a>'
            '<a href="http://m.com/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/">m</a>'
            '<a href="http://m.com/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/">m</a>'
            '</html>'
        )

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_equal([
            call(
                key='page.links',
                value=set([])
            ),
            call(
                key='total.number.links',
                value=2
            ),
            call(
                key='total.number.invalid_links',
                value=0
            ),
            call(
                key='page.invalid_links',
                value=set([])
            )])
示例#16
0
    def test_not_get_links_with_nofollow(self):
        page = PageFactory.create(url='http://m.com/')

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html>' \
                  '<a href="http://m.com/test/">test</a>' \
                  '<a href="http://m.com/metal/" rel="nofollow">metal</a>' \
                  '</html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_equal([
            call(
                key='page.links',
                value=set([])
            ),
            call(
                key='total.number.links',
                value=1
            ),
            call(
                key='total.number.invalid_links',
                value=0
            ),
            call(
                key='page.invalid_links',
                value=set([])
            )])
示例#17
0
    def test_not_get_links_with_nofollow(self):
        page = PageFactory.create(url='http://m.com/')

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html>' \
                  '<a href="http://m.com/test/">test</a>' \
                  '<a href="http://m.com/metal/" rel="nofollow">metal</a>' \
                  '</html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_equal([
            call(
                key='page.links',
                value=set([])
            ),
            call(
                key='total.number.links',
                value=1
            ),
            call(
                key='total.number.invalid_links',
                value=0
            ),
            call(
                key='page.invalid_links',
                value=set([])
            )])
示例#18
0
    def test_url_ends_with_slash(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.review.data).to_include('page.links')

        expect(facter.async_get.call_args_list).to_include(
            call(
                'http://my-site.com',
                facter.handle_url_loaded
            ))

        expect(facter.async_get.call_args_list).to_include(
            call(
                'http://my-site.com/privacidade.html',
                facter.handle_url_loaded
            ))

        expect(facter.async_get.call_args_list).to_include(
            call(
                'http://my-site.com/todos-os-sites.html',
                facter.handle_url_loaded
            ))
示例#19
0
    def test_ignore_base64_images(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><img src="data:image/png;base64,iVBOR" alt="a" title="b" /></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = ImageFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.images',
                value=set([]),
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.size.img',
                value=0,
            )),

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.requests.img',
                value=0,
            ))
示例#20
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url="http://localhost:2368",
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[],
        )

        content = self.get_file("globo.html")

        result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)}
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.review.data).to_length(2)

        expect(facter.review.data).to_include("page.links")
        expect(facter.review.data["page.links"]).to_equal(set([]))

        expect(facter.review.data).to_include("page.all_links")
        link = facter.review.data["page.all_links"][1]
        expect(link.tag).to_equal("a")
        expect(link.get("href")).to_equal("/")
        expect(link.get("title")).to_equal("globo.com")

        expect(facter.async_get.call_count).to_equal(335)

        expect(facter.add_fact.call_args_list).to_include(call(key="page.links", value=set([])))

        expect(facter.add_fact.call_args_list).to_include(call(key="total.number.links", value=489))

        expect(facter.add_fact.call_args_list).to_include(call(key="total.number.invalid_links", value=0))

        expect(facter.add_fact.call_args_list).to_include(call(key="page.invalid_links", value=set([])))
示例#21
0
    def test_handle_url_loaded(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><img src="test.png" alt="a" title="b" /></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = ImageFacter(reviewer)
        facter.async_get = Mock()
        facter.get_facts()

        facter.handle_url_loaded(page.url, response)

        expect(facter.review.data).to_include('page.all_images')
        expect(facter.review.data['page.all_images']).not_to_be_null()

        img_src = facter.review.data['page.all_images'][0].get('src')
        expect(img_src).to_equal('test.png')

        expect(facter.review.data).to_include('page.images')
        data = set([(page.url, response)])
        expect(facter.review.data['page.images']).to_equal(data)

        expect(facter.review.data).to_include('total.size.img')
        expect(facter.review.data['total.size.img']).to_equal(0.0517578125)
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = GoogleAnalyticsFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.review.data).to_length(1)
        expect(facter.review.data).to_include('page.google_analytics')

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.google_analytics',
                value=set([
                    ('UA-296593-2', 'www.globo.com'),
                    ('UA-296593-15', '.globo.com')
                ])
            ))
示例#23
0
    def test_javascript_link(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://*****:*****@\'+OAS_listpos+\'!\'+pos+\'?\'+OAS_query+\'" TARGET=\'+OAS_target+\'>\');</script>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.links',
                value=set([])
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.links',
                value=0
            ))
示例#24
0
    def test_can_validate_total_requests_zero_requests(self):
        config = Config()

        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=config,
            validators=[]
        )

        content = "<html></html>"

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        reviewer.content_loaded(page.url, Mock(status_code=200, text=content, headers={}))

        validator = TotalRequestsValidator(reviewer)

        validator.add_fact = Mock()
        validator.add_violation = Mock()

        validator.validate()

        expect(validator.add_fact.call_args_list).to_length(1)
        expect(validator.add_fact.call_args_list).to_include(
            call(
                key='total.requests',
                value=0,
                title='Total requests'
            ))

        expect(validator.add_violation.called).to_be_false()
示例#25
0
    def test_javascript_link(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://*****:*****@\'+OAS_listpos+\'!\'+pos+\'?\'+OAS_query+\'" TARGET=\'+OAS_target+\'>\');</script>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.links',
                value=set([])
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.links',
                value=0
            ))
示例#26
0
    def test_can_get_facts_deburring_title(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        title = 'a' * 70
        content = '<html>  <title>\n    %s\n  </title></html>' % title
        html_content = lxml.html.fromstring(content)

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': html_content
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        reviewer.content_loaded(page.url, Mock(status_code=200, text=content, headers={}))

        facter = TitleFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        facter.add_fact.assert_called_once_with(
            key='page.title',
            value=html_content.cssselect('title')[0].text.strip()
        )

        expect(facter.review.data).to_length(2)
        expect(facter.review.data).to_include('page.title_count')
        expect(facter.review.data).to_include('page.title')
        expect(facter.review.data['page.title_count']).to_equal(1)
        expect(facter.review.data['page.title']).to_length(70)
示例#27
0
    def test_handle_url_loaded(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><link href="a.css" /></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = CSSFacter(reviewer)
        facter.async_get = Mock()
        facter.get_facts()

        facter.handle_url_loaded(page.url, response)

        expect(facter.review.data).to_include('total.size.css')
        expect(facter.review.data['total.size.css']).to_equal(0.033203125)

        expect(facter.review.data).to_include('total.size.css.gzipped')
        expect(facter.review.data['total.size.css.gzipped']).to_equal(0.0380859375)

        expect(facter.review.data).to_include('page.css')
        data = set([(page.url, response)])
        expect(facter.review.data['page.css']).to_equal(data)
示例#28
0
    def test_can_get_facts_deburring_title(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            facters=[])

        title = 'a' * 70
        content = '<html>  <title>\n    %s\n  </title></html>' % title
        html_content = lxml.html.fromstring(content)

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': html_content
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        reviewer.content_loaded(
            page.url, Mock(status_code=200, text=content, headers={}))

        facter = TitleFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        facter.add_fact.assert_called_once_with(
            key='page.title',
            value=html_content.cssselect('title')[0].text.strip())

        expect(facter.review.data).to_length(2)
        expect(facter.review.data).to_include('page.title_count')
        expect(facter.review.data).to_include('page.title')
        expect(facter.review.data['page.title_count']).to_equal(1)
        expect(facter.review.data['page.title']).to_length(70)
示例#29
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><head><link rel="canonical" href="http://my-url.com?item=test" /></head></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = HeadFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.review.data).to_length(1)

        expect(facter.review.data).to_include('page.head')
        head = facter.review.data['page.head'][0]
        expect(head.tag).to_equal('head')
        data = [('rel', 'canonical'), ('href', 'http://my-url.com?item=test')]
        expect(head.getchildren()[0].items()).to_equal(data)

        expect(facter.add_fact.called).to_be_false()
示例#30
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        reviewer.content_loaded(page.url, Mock(status_code=200, text=content, headers={}))

        facter = TitleFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        facter.add_fact.assert_called_once_with(
            key='page.title',
            value=u'globo.com - Absolutamente tudo sobre not\xedcias, '
                  'esportes e entretenimento',
        )

        expect(facter.review.data).to_length(2)
        expect(facter.review.data).to_include('page.title_count')
        expect(facter.review.data).to_include('page.title')
        expect(facter.review.data['page.title_count']).to_equal(1)
示例#31
0
    def test_handle_url_loaded(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            facters=[])

        content = '<script type="text/javascript" src="teste.js"></script>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = JSFacter(reviewer)
        facter.async_get = Mock()
        facter.get_facts()

        facter.handle_url_loaded(page.url, response)

        expect(facter.review.data).to_include('total.size.js')
        expect(facter.review.data['total.size.js']).to_equal(0.0537109375)

        expect(facter.review.data).to_include('total.size.js.gzipped')
        expect(
            facter.review.data['total.size.js.gzipped']).to_equal(0.05078125)

        expect(facter.review.data).to_include('page.js')
        data = set([(page.url, response)])
        expect(facter.review.data['page.js']).to_equal(data)
示例#32
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            facters=[])

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        reviewer.content_loaded(
            page.url, Mock(status_code=200, text=content, headers={}))

        facter = TitleFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        facter.add_fact.assert_called_once_with(
            key='page.title',
            value=u'globo.com - Absolutamente tudo sobre not\xedcias, '
            'esportes e entretenimento',
        )

        expect(facter.review.data).to_length(2)
        expect(facter.review.data).to_include('page.title_count')
        expect(facter.review.data).to_include('page.title')
        expect(facter.review.data['page.title_count']).to_equal(1)
示例#33
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html></html>'

        headers = {'Last-Modified': 'January 13, 2014 1:16:10 PM'}

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content),
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers=headers)
        reviewer.content_loaded(page.url, response)

        facter = LastModifiedFacter(reviewer)
        facter.add_fact = Mock()
        facter.get_facts()

        expect(facter.review.data).to_length(1)
        expect(facter.review.data).to_include('page.last_modified')

        expect(facter.review.data).to_be_like({
            'page.last_modified': datetime.datetime(2014, 1, 13, 1, 16, 10)}
        )
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            facters=[])

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = GoogleAnalyticsFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.review.data).to_length(1)
        expect(facter.review.data).to_include('page.google_analytics')

        expect(facter.add_fact.call_args_list).to_include(
            call(key='page.google_analytics',
                 value=set([('UA-296593-2', 'www.globo.com'),
                            ('UA-296593-15', '.globo.com')])))
示例#35
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><body class="test"></body></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = BodyFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.review.data).to_length(1)
        expect(facter.review.data).to_include('page.body')
        expect(facter.review.data['page.body'][0].tag).to_equal('body')

        expect(facter.add_fact.called).to_be_false()
示例#36
0
    def test_handle_url_loaded_with_empty_content(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = ''
        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': content
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = CSSFacter(reviewer)
        facter.async_get = Mock()
        facter.get_facts()

        facter.handle_url_loaded(page.url, response)

        expect(facter.review.data).to_include('total.size.css')
        expect(facter.review.data['total.size.css']).to_equal(0)

        expect(facter.review.data).to_include('total.size.css.gzipped')
        expect(facter.review.data['total.size.css.gzipped']).to_equal(0)
示例#37
0
    def test_when_content_is_empty(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><meta test="" property=""/></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = MetaTagsFacter(reviewer)
        facter.add_fact = Mock()

        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='meta.tags',
                value=[{'content': None, 'property': 'test', 'key': ''}]
            ))
示例#38
0
    def test_invalid_url(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><link href="http://].css" /></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = CSSFacter(reviewer)
        facter.add_fact = Mock()
        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.css',
                value=set([]),
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.size.css',
                value=0,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.size.css.gzipped',
                value=0,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.requests.css',
                value=0,
            ))

        expect(facter.review.data).to_include('total.size.css')
        expect(facter.review.data['total.size.css']).to_equal(0)

        expect(facter.review.data).to_include('total.size.css.gzipped')
        expect(facter.review.data['total.size.css.gzipped']).to_equal(0)

        expect(facter.review.data).to_include('page.css')
        expect(facter.review.data['page.css']).to_equal(set([]))
示例#39
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.review.data).to_length(2)

        expect(facter.review.data).to_include('page.links')
        expect(facter.review.data['page.links']).to_equal(set([]))

        expect(facter.review.data).to_include('page.all_links')
        link = facter.review.data['page.all_links'][1]
        expect(link.tag).to_equal('a')
        expect(link.get('href')).to_equal('/')
        expect(link.get('title')).to_equal('globo.com')

        expect(facter.async_get.call_count).to_equal(335)

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.links',
                value=set([]),
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.links',
                value=489,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.invalid_links',
                value=0,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.invalid_links',
                value=set([]),
            ))
示例#40
0
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = LinkFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.review.data).to_length(2)

        expect(facter.review.data).to_include('page.links')
        expect(facter.review.data['page.links']).to_equal(set([]))

        expect(facter.review.data).to_include('page.all_links')
        link = facter.review.data['page.all_links'][1]
        expect(link.tag).to_equal('a')
        expect(link.get('href')).to_equal('/')
        expect(link.get('title')).to_equal('globo.com')

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.links',
                value=set([]),
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.links',
                value=4,
            ))

        expect(facter.async_get.call_args_list).to_include(
            call(
                'http://my-site.com/privacidade.html',
                facter.handle_url_loaded
            ))

        expect(facter.async_get.call_args_list).to_include(
            call(
                'http://my-site.com',
                facter.handle_url_loaded
            ))

        expect(facter.async_get.call_args_list).to_include(
            call(
                'http://my-site.com/todos-os-sites.html',
                facter.handle_url_loaded
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.number.invalid_links',
                value=0,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.invalid_links',
                value=set([]),
            ))
示例#41
0
    def test_can_get_facts(self):
        page = PageFactory.create(url='http://my-site.com')

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><img src="test.png" alt="a" title="b" /><img src="" /></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = ImageFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.review.data).to_length(3)

        expect(facter.review.data).to_include('page.all_images')

        img = facter.review.data['page.all_images'][0]
        expect(img.tag).to_equal('img')
        expect(img.get('src')).to_equal('test.png')

        expect(facter.review.data).to_include('page.images')
        expect(facter.review.data['page.images']).to_equal(set([]))

        expect(facter.review.data).to_include('total.size.img')
        expect(facter.review.data['total.size.img']).to_equal(0)

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.images',
                value=set([]),
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.size.img',
                value=0,
            )),

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.requests.img',
                value=1,
            ))

        facter.async_get.assert_called_once_with(
            'http://my-site.com/test.png',
            facter.handle_url_loaded
        )
示例#42
0
    def test_can_get_facts(self):
        page = PageFactory.create(url='http://my-site.com')

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = '<html><link href="a.css" /><link href="a.cse" /></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = CSSFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.css',
                value=set([]),
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.size.css',
                value=0,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.size.css.gzipped',
                value=0,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.requests.css',
                value=1,
            ))

        expect(facter.review.data).to_length(3)

        expect(facter.review.data).to_be_like({
            'total.size.css': 0,
            'total.size.css.gzipped': 0,
            'page.css': set([])
        })

        facter.async_get.assert_called_once_with(
            'http://my-site.com/a.css',
            facter.handle_url_loaded
        )
    def test_can_get_facts(self):
        page = PageFactory.create()

        reviewer = Reviewer(
            api_url='http://localhost:2368',
            page_uuid=page.uuid,
            page_url=page.url,
            page_score=0.0,
            config=Config(),
            facters=[]
        )

        content = self.get_file('globo.html')

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = HeadingHierarchyFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.review.data).to_length(1)
        expect(facter.review.data).to_include('page.heading_hierarchy')

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.heading_hierarchy',
                value=[
                    ('h1', 'globo.com'),
                    ('h2', u'ANP: gigantes do petr\xf3leo desistem de leil\xe3o do pr\xe9-sal'),
                    ('h2', u'Pol\xedcia conclui que m\xe3e matou as filhas em SP'),
                    ('h2', u'Isolados por tempestade no M\xe9xico, 42 brasileiros recorrem \xe0 embaixada'),
                    ('h2', 'Show do Sorriso Maroto tem uma morte em tiroteio'),
                    ('h2', u'Casal \xe9 morto e crian\xe7as s\xe3o largadas em via'),
                    ('h2', 'AO VIVO: Almah e Hibria tocam no Rock in Rio'),
                    ('h2', u'SIGA AQUI: f\xe3 da banda Ghost faz homenagem'),
                    ('h2', "Thales pede e Nicole aparece em 'Amor'; veja"),
                    ('h2', u'Vasco e Tim\xe3o t\xeam puni\xe7\xe3o mudada'),
                    ('h2', u'Suposta bronca de Messi gera discuss\xe3o; veja'),
                    ('h3', 'rock in rio'),
                    ('h2', u'not\xedcias'),
                    ('h2', 'esportes'),
                    ('h2', 'entretenimento'),
                    ('h2', 'tecnologia&games'),
                    ('h2', 'moda&beleza'),
                    ('h3', u'semana de moda de mil\xe3o'),
                    ('h3', u'com delineador, sombra e l\xe1pis'),
                    ('h3', 'tudo igual'),
                    ('h3', u'veja tamb\xe9m'),
                    ('h3', 'GNT'),
                    ('h3', 'vogue'),
                    ('h2', u'CASA&DECORA\xc7\xc3O'),
                    ('h3', 'da madeira ao cristal'),
                    ('h3', 'para comer com os olhos'),
                    ('h3', 'Gosta de cor nos ambientes'),
                    ('h3', u'veja tamb\xe9m'),
                    ('h3', 'CASA E JARDIM'),
                    ('h3', 'CASA VOGUE'),
                    ('h2', 'FAMOSOSFAMOSOS'),
                    ('h3', 'ego'),
                    ('h3', 'quem'),
                    ('h2', u'novelas, s\xe9ries, programas e muito maisnovelas, s\xe9ries, programas e muito mais'),
                    ('h3', 'ESTILO TV'),
                    ('h3', 'SARAMANDAIA'),
                    ('h3', u'AMOR \xc0 VIDA'),
                    ('h3', 'SANGUE BOM'),
                    ('h3', 'JOIA RARA'),
                    ('h2', u'M\xfasica'),
                    ('h3', u'm\xfasica.com.br'),
                    ('h3', u'g1 m\xfasica'),
                    ('h3', 'multishow'),
                    ('h3', 'globoradio'),
                    ('h3', 'TOP 3 LETRAS'),
                    ('h4', ''),
                    ('h4', ''),
                    ('h4', ''),
                    ('h3', u'ENCONTRE LETRAS E TRADU\xc7\xd5ES'),
                    ('h3', u'top globot\xe1 todo mundo clicando...'),
                    ('h1', 'globo.tv'),
                    ('h2', u'servi\xe7os')
                ]))
示例#44
0
    def test_can_get_facts(self):
        page = PageFactory.create(url='http://my-site.com/')

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            facters=[])

        content = '<script type="text/javascript" src="teste.js"></script>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = JSFacter(reviewer)
        facter.add_fact = Mock()

        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.js',
                value=set([]),
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.size.js',
                value=0,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.size.js.gzipped',
                value=0,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.requests.js',
                value=1,
            ))

        expect(facter.review.data).to_length(3)

        expect(facter.review.data).to_be_like({
            'total.size.js.gzipped': 0,
            'page.js': set([]),
            'total.size.js': 0
        })

        facter.async_get.assert_called_once_with('http://my-site.com/teste.js',
                                                 facter.handle_url_loaded)
示例#45
0
    def test_invalid_url(self):
        page = PageFactory.create()

        reviewer = Reviewer(api_url='http://localhost:2368',
                            page_uuid=page.uuid,
                            page_url=page.url,
                            page_score=0.0,
                            config=Config(),
                            facters=[])

        content = '<html><link href="http://].js" /></html>'

        result = {
            'url': page.url,
            'status': 200,
            'content': content,
            'html': lxml.html.fromstring(content)
        }
        reviewer.responses[page.url] = result
        reviewer._wait_for_async_requests = Mock()
        reviewer.save_review = Mock()
        response = Mock(status_code=200, text=content, headers={})
        reviewer.content_loaded(page.url, response)

        facter = JSFacter(reviewer)
        facter.add_fact = Mock()
        facter.async_get = Mock()
        facter.get_facts()

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='page.js',
                value=set([]),
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.size.js',
                value=0,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.size.js.gzipped',
                value=0,
            ))

        expect(facter.add_fact.call_args_list).to_include(
            call(
                key='total.requests.js',
                value=0,
            ))

        expect(facter.review.data).to_include('total.size.js')
        expect(facter.review.data['total.size.js']).to_equal(0)

        expect(facter.review.data).to_include('total.size.js.gzipped')
        expect(facter.review.data['total.size.js.gzipped']).to_equal(0)

        expect(facter.review.data).to_include('page.js')
        expect(facter.review.data['page.js']).to_equal(set([]))