def test_can_load_url_with_empy_headers(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content), } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LastModifiedFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.review.data).to_length(0) expect(facter.review.data).to_be_like({}) expect(facter.add_fact.called).to_be_false()
def test_can_get_fact_definitions(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = TitleFacter(reviewer) definitions = facter.get_fact_definitions() expect(definitions).to_length(1) expect('page.title' in definitions).to_be_true()
def test_handle_url_loaded(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_url_loaded(page.url, response) expect(facter.review.data).to_include('page.links') data = set([(page.url, response)]) expect(facter.review.data['page.links']).to_equal(data)
def test_link_looks_like_image(self): page = PageFactory.create(url="http://globo.com/") reviewer = Reviewer( api_url="http://localhost:2368", page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[], ) content = '<html><a href="http://globo.com/metal.png">Metal</a></html>' result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)} reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_include(call(key="page.links", value=set([]))) expect(facter.add_fact.call_args_list).to_include(call(key="total.number.links", value=0)) expect(facter.async_get.called).to_be_false()
def test_javascript_link(self): page = PageFactory.create() reviewer = Reviewer( api_url="http://*****:*****@'+OAS_listpos+'!'+pos+'?'+OAS_query+'\" TARGET='+OAS_target+'>');</script>" result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)} reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_include(call(key="page.links", value=set([]))) expect(facter.add_fact.call_args_list).to_include(call(key="total.number.links", value=0))
def test_no_title_tag(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() reviewer.content_loaded(page.url, Mock(status_code=200, text=content, headers={})) facter = TitleFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.add_fact.called).to_be_false() expect(facter.review.data).to_be_like({})
def test_handle_url_loaded(self): page = PageFactory.create() reviewer = Reviewer( api_url="http://localhost:2368", page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[], ) content = self.get_file("globo.html") result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)} reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_url_loaded(page.url, response) expect(facter.review.data).to_include("page.links") data = set([(page.url, response)]) expect(facter.review.data["page.links"]).to_equal(data)
def test_handle_url_loaded_with_empty_content(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) content = '' result = { 'url': page.url, 'status': 200, 'content': content, 'html': content } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = JSFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_url_loaded(page.url, response) expect(facter.review.data).to_include('total.size.js') expect(facter.review.data['total.size.js']).to_equal(0) expect(facter.review.data).to_include('total.size.js.gzipped') expect(facter.review.data['total.size.js.gzipped']).to_equal(0)
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) content = '<html><body class="test"></body></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = BodyFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.review.data).to_length(1) expect(facter.review.data).to_include('page.body') expect(facter.review.data['page.body'][0].tag).to_equal('body') expect(facter.add_fact.called).to_be_false()
def test_can_get_fact_definitions(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = TitleFacter(reviewer) definitions = facter.get_fact_definitions() expect(definitions).to_length(1) expect('page.title' in definitions).to_be_true()
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = MetaTagsFacter(reviewer) facter.add_fact = Mock() facter.get_facts() values = [{'content': 'utf-8', 'property': None, 'key': 'charset'}, {'content': 'text/html;charset=UTF-8', 'property': 'http-equiv', 'key': 'Content-Type'}, {'content': 'BKmmuVQac1JM6sKlj3IoXQvffyIRJvJfbicMouA2a88', 'property': 'name', 'key': 'google-site-verification'}, {'content': 'width=device-width, initial-scale=1.0, maximum-scale=1.0', 'property': 'name', 'key': 'viewport'}, {'content': u'globo.com - Absolutamente tudo sobre not\xedcias, esportes e entretenimento', 'property': 'property', 'key': 'og:title'}, {'content': 'website', 'property': 'property', 'key': 'og:type'}, {'content': 'http://www.globo.com/', 'property': 'property', 'key': 'og:url'}, {'content': 'http://s.glbimg.com/en/ho/static/globocom2012/img/gcom_marca_og.jpg', 'property': 'property', 'key': 'og:image'}, {'content': 'globo.com', 'property': 'property', 'key': 'og:site_name'}, {'content': u'S\xf3 na globo.com voc\xea encontra tudo sobre o conte\xfado e marcas das Organiza\xe7\xf5es Globo. O melhor acervo de v\xeddeos online sobre entretenimento, esportes e jornalismo do Brasil.', 'property': 'property', 'key': 'og:description'}, {'content': '224969370851736', 'property': 'property', 'key': 'fb:page_id'}, {'content': u'S\xf3 na globo.com voc\xea encontra tudo sobre o conte\xfado e marcas das Organiza\xe7\xf5es Globo. O melhor acervo de v\xeddeos online sobre entretenimento, esportes e jornalismo do Brasil.', 'property': 'name', 'key': 'description'}, {'content': u'Not\xedcias, Entretenimento, Esporte, Tecnologia, Portal, Conte\xfado, Rede Globo, TV Globo, V\xeddeos, Televis\xe3o', 'property': 'name', 'key': 'keywords'}, {'content': 'Globo.com', 'property': 'name', 'key': 'application-name'}, {'content': '#0669DE', 'property': 'name', 'key': 'msapplication-TileColor'}, {'content': 'http://s.glbimg.com/en/ho/static/globocom2012/img/globo-win-tile.png', 'property': 'name', 'key': 'msapplication-TileImage'}] expect(facter.add_fact.call_args_list).to_include( call( key='meta.tags', value=values, )) expect(facter.review.data).to_length(1) expect(facter.review.data).to_include('meta.tags') expect(facter.review.data).to_be_like({'meta.tags': values})
def test_invalid_link(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html><a href="http://]http://www.globo.com/malhacao">blah</a></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_include( call( key='page.links', value=set([]) )) expect(facter.add_fact.call_args_list).to_include( call( key='total.number.links', value=0 )) expect(facter.add_fact.call_args_list).to_include( call( key='total.number.invalid_links', value=1 )) expect(facter.add_fact.call_args_list).to_include( call( key='page.invalid_links', value=set(['http://]http://www.globo.com/malhacao']) ))
def test_no_get_url_that_exceed_max_url_level(self): page = PageFactory.create(url='http://m.com/') reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = ( '<html>' '<a href="http://m.com/test/">test</a>' '<a href="http://m.com/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/">m</a>' '<a href="http://m.com/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/">m</a>' '</html>' ) result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_equal([ call( key='page.links', value=set([]) ), call( key='total.number.links', value=1 ), call( key='total.number.invalid_links', value=0 ), call( key='page.invalid_links', value=set([]) )])
def test_no_get_url_that_exceed_max_url_level(self): page = PageFactory.create(url='http://m.com/') reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = ( '<html>' '<a href="http://m.com/test/">test</a>' '<a href="http://m.com/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/">m</a>' '<a href="http://m.com/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/">m</a>' '</html>' ) result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_equal([ call( key='page.links', value=set([]) ), call( key='total.number.links', value=2 ), call( key='total.number.invalid_links', value=0 ), call( key='page.invalid_links', value=set([]) )])
def test_not_get_links_with_nofollow(self): page = PageFactory.create(url='http://m.com/') reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html>' \ '<a href="http://m.com/test/">test</a>' \ '<a href="http://m.com/metal/" rel="nofollow">metal</a>' \ '</html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_equal([ call( key='page.links', value=set([]) ), call( key='total.number.links', value=1 ), call( key='total.number.invalid_links', value=0 ), call( key='page.invalid_links', value=set([]) )])
def test_url_ends_with_slash(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.review.data).to_include('page.links') expect(facter.async_get.call_args_list).to_include( call( 'http://my-site.com', facter.handle_url_loaded )) expect(facter.async_get.call_args_list).to_include( call( 'http://my-site.com/privacidade.html', facter.handle_url_loaded )) expect(facter.async_get.call_args_list).to_include( call( 'http://my-site.com/todos-os-sites.html', facter.handle_url_loaded ))
def test_ignore_base64_images(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html><img src="data:image/png;base64,iVBOR" alt="a" title="b" /></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = ImageFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_include( call( key='page.images', value=set([]), )) expect(facter.add_fact.call_args_list).to_include( call( key='total.size.img', value=0, )), expect(facter.add_fact.call_args_list).to_include( call( key='total.requests.img', value=0, ))
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer( api_url="http://localhost:2368", page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[], ) content = self.get_file("globo.html") result = {"url": page.url, "status": 200, "content": content, "html": lxml.html.fromstring(content)} reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.review.data).to_length(2) expect(facter.review.data).to_include("page.links") expect(facter.review.data["page.links"]).to_equal(set([])) expect(facter.review.data).to_include("page.all_links") link = facter.review.data["page.all_links"][1] expect(link.tag).to_equal("a") expect(link.get("href")).to_equal("/") expect(link.get("title")).to_equal("globo.com") expect(facter.async_get.call_count).to_equal(335) expect(facter.add_fact.call_args_list).to_include(call(key="page.links", value=set([]))) expect(facter.add_fact.call_args_list).to_include(call(key="total.number.links", value=489)) expect(facter.add_fact.call_args_list).to_include(call(key="total.number.invalid_links", value=0)) expect(facter.add_fact.call_args_list).to_include(call(key="page.invalid_links", value=set([])))
def test_handle_url_loaded(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html><img src="test.png" alt="a" title="b" /></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = ImageFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_url_loaded(page.url, response) expect(facter.review.data).to_include('page.all_images') expect(facter.review.data['page.all_images']).not_to_be_null() img_src = facter.review.data['page.all_images'][0].get('src') expect(img_src).to_equal('test.png') expect(facter.review.data).to_include('page.images') data = set([(page.url, response)]) expect(facter.review.data['page.images']).to_equal(data) expect(facter.review.data).to_include('total.size.img') expect(facter.review.data['total.size.img']).to_equal(0.0517578125)
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = GoogleAnalyticsFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.review.data).to_length(1) expect(facter.review.data).to_include('page.google_analytics') expect(facter.add_fact.call_args_list).to_include( call( key='page.google_analytics', value=set([ ('UA-296593-2', 'www.globo.com'), ('UA-296593-15', '.globo.com') ]) ))
def test_javascript_link(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://*****:*****@\'+OAS_listpos+\'!\'+pos+\'?\'+OAS_query+\'" TARGET=\'+OAS_target+\'>\');</script>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_include( call( key='page.links', value=set([]) )) expect(facter.add_fact.call_args_list).to_include( call( key='total.number.links', value=0 ))
def test_can_validate_total_requests_zero_requests(self): config = Config() page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) content = "<html></html>" result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() reviewer.content_loaded(page.url, Mock(status_code=200, text=content, headers={})) validator = TotalRequestsValidator(reviewer) validator.add_fact = Mock() validator.add_violation = Mock() validator.validate() expect(validator.add_fact.call_args_list).to_length(1) expect(validator.add_fact.call_args_list).to_include( call( key='total.requests', value=0, title='Total requests' )) expect(validator.add_violation.called).to_be_false()
def test_can_get_facts_deburring_title(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) title = 'a' * 70 content = '<html> <title>\n %s\n </title></html>' % title html_content = lxml.html.fromstring(content) result = { 'url': page.url, 'status': 200, 'content': content, 'html': html_content } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() reviewer.content_loaded(page.url, Mock(status_code=200, text=content, headers={})) facter = TitleFacter(reviewer) facter.add_fact = Mock() facter.get_facts() facter.add_fact.assert_called_once_with( key='page.title', value=html_content.cssselect('title')[0].text.strip() ) expect(facter.review.data).to_length(2) expect(facter.review.data).to_include('page.title_count') expect(facter.review.data).to_include('page.title') expect(facter.review.data['page.title_count']).to_equal(1) expect(facter.review.data['page.title']).to_length(70)
def test_handle_url_loaded(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html><link href="a.css" /></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = CSSFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_url_loaded(page.url, response) expect(facter.review.data).to_include('total.size.css') expect(facter.review.data['total.size.css']).to_equal(0.033203125) expect(facter.review.data).to_include('total.size.css.gzipped') expect(facter.review.data['total.size.css.gzipped']).to_equal(0.0380859375) expect(facter.review.data).to_include('page.css') data = set([(page.url, response)]) expect(facter.review.data['page.css']).to_equal(data)
def test_can_get_facts_deburring_title(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) title = 'a' * 70 content = '<html> <title>\n %s\n </title></html>' % title html_content = lxml.html.fromstring(content) result = { 'url': page.url, 'status': 200, 'content': content, 'html': html_content } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() reviewer.content_loaded( page.url, Mock(status_code=200, text=content, headers={})) facter = TitleFacter(reviewer) facter.add_fact = Mock() facter.get_facts() facter.add_fact.assert_called_once_with( key='page.title', value=html_content.cssselect('title')[0].text.strip()) expect(facter.review.data).to_length(2) expect(facter.review.data).to_include('page.title_count') expect(facter.review.data).to_include('page.title') expect(facter.review.data['page.title_count']).to_equal(1) expect(facter.review.data['page.title']).to_length(70)
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html><head><link rel="canonical" href="http://my-url.com?item=test" /></head></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = HeadFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.review.data).to_length(1) expect(facter.review.data).to_include('page.head') head = facter.review.data['page.head'][0] expect(head.tag).to_equal('head') data = [('rel', 'canonical'), ('href', 'http://my-url.com?item=test')] expect(head.getchildren()[0].items()).to_equal(data) expect(facter.add_fact.called).to_be_false()
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() reviewer.content_loaded(page.url, Mock(status_code=200, text=content, headers={})) facter = TitleFacter(reviewer) facter.add_fact = Mock() facter.get_facts() facter.add_fact.assert_called_once_with( key='page.title', value=u'globo.com - Absolutamente tudo sobre not\xedcias, ' 'esportes e entretenimento', ) expect(facter.review.data).to_length(2) expect(facter.review.data).to_include('page.title_count') expect(facter.review.data).to_include('page.title') expect(facter.review.data['page.title_count']).to_equal(1)
def test_handle_url_loaded(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) content = '<script type="text/javascript" src="teste.js"></script>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = JSFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_url_loaded(page.url, response) expect(facter.review.data).to_include('total.size.js') expect(facter.review.data['total.size.js']).to_equal(0.0537109375) expect(facter.review.data).to_include('total.size.js.gzipped') expect( facter.review.data['total.size.js.gzipped']).to_equal(0.05078125) expect(facter.review.data).to_include('page.js') data = set([(page.url, response)]) expect(facter.review.data['page.js']).to_equal(data)
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() reviewer.content_loaded( page.url, Mock(status_code=200, text=content, headers={})) facter = TitleFacter(reviewer) facter.add_fact = Mock() facter.get_facts() facter.add_fact.assert_called_once_with( key='page.title', value=u'globo.com - Absolutamente tudo sobre not\xedcias, ' 'esportes e entretenimento', ) expect(facter.review.data).to_length(2) expect(facter.review.data).to_include('page.title_count') expect(facter.review.data).to_include('page.title') expect(facter.review.data['page.title_count']).to_equal(1)
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html></html>' headers = {'Last-Modified': 'January 13, 2014 1:16:10 PM'} result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content), } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers=headers) reviewer.content_loaded(page.url, response) facter = LastModifiedFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.review.data).to_length(1) expect(facter.review.data).to_include('page.last_modified') expect(facter.review.data).to_be_like({ 'page.last_modified': datetime.datetime(2014, 1, 13, 1, 16, 10)} )
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = GoogleAnalyticsFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.review.data).to_length(1) expect(facter.review.data).to_include('page.google_analytics') expect(facter.add_fact.call_args_list).to_include( call(key='page.google_analytics', value=set([('UA-296593-2', 'www.globo.com'), ('UA-296593-15', '.globo.com')])))
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html><body class="test"></body></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = BodyFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.review.data).to_length(1) expect(facter.review.data).to_include('page.body') expect(facter.review.data['page.body'][0].tag).to_equal('body') expect(facter.add_fact.called).to_be_false()
def test_handle_url_loaded_with_empty_content(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '' result = { 'url': page.url, 'status': 200, 'content': content, 'html': content } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = CSSFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_url_loaded(page.url, response) expect(facter.review.data).to_include('total.size.css') expect(facter.review.data['total.size.css']).to_equal(0) expect(facter.review.data).to_include('total.size.css.gzipped') expect(facter.review.data['total.size.css.gzipped']).to_equal(0)
def test_when_content_is_empty(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html><meta test="" property=""/></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = MetaTagsFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_include( call( key='meta.tags', value=[{'content': None, 'property': 'test', 'key': ''}] ))
def test_invalid_url(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html><link href="http://].css" /></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = CSSFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_include( call( key='page.css', value=set([]), )) expect(facter.add_fact.call_args_list).to_include( call( key='total.size.css', value=0, )) expect(facter.add_fact.call_args_list).to_include( call( key='total.size.css.gzipped', value=0, )) expect(facter.add_fact.call_args_list).to_include( call( key='total.requests.css', value=0, )) expect(facter.review.data).to_include('total.size.css') expect(facter.review.data['total.size.css']).to_equal(0) expect(facter.review.data).to_include('total.size.css.gzipped') expect(facter.review.data['total.size.css.gzipped']).to_equal(0) expect(facter.review.data).to_include('page.css') expect(facter.review.data['page.css']).to_equal(set([]))
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.review.data).to_length(2) expect(facter.review.data).to_include('page.links') expect(facter.review.data['page.links']).to_equal(set([])) expect(facter.review.data).to_include('page.all_links') link = facter.review.data['page.all_links'][1] expect(link.tag).to_equal('a') expect(link.get('href')).to_equal('/') expect(link.get('title')).to_equal('globo.com') expect(facter.async_get.call_count).to_equal(335) expect(facter.add_fact.call_args_list).to_include( call( key='page.links', value=set([]), )) expect(facter.add_fact.call_args_list).to_include( call( key='total.number.links', value=489, )) expect(facter.add_fact.call_args_list).to_include( call( key='total.number.invalid_links', value=0, )) expect(facter.add_fact.call_args_list).to_include( call( key='page.invalid_links', value=set([]), ))
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LinkFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.review.data).to_length(2) expect(facter.review.data).to_include('page.links') expect(facter.review.data['page.links']).to_equal(set([])) expect(facter.review.data).to_include('page.all_links') link = facter.review.data['page.all_links'][1] expect(link.tag).to_equal('a') expect(link.get('href')).to_equal('/') expect(link.get('title')).to_equal('globo.com') expect(facter.add_fact.call_args_list).to_include( call( key='page.links', value=set([]), )) expect(facter.add_fact.call_args_list).to_include( call( key='total.number.links', value=4, )) expect(facter.async_get.call_args_list).to_include( call( 'http://my-site.com/privacidade.html', facter.handle_url_loaded )) expect(facter.async_get.call_args_list).to_include( call( 'http://my-site.com', facter.handle_url_loaded )) expect(facter.async_get.call_args_list).to_include( call( 'http://my-site.com/todos-os-sites.html', facter.handle_url_loaded )) expect(facter.add_fact.call_args_list).to_include( call( key='total.number.invalid_links', value=0, )) expect(facter.add_fact.call_args_list).to_include( call( key='page.invalid_links', value=set([]), ))
def test_can_get_facts(self): page = PageFactory.create(url='http://my-site.com') reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html><img src="test.png" alt="a" title="b" /><img src="" /></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = ImageFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.review.data).to_length(3) expect(facter.review.data).to_include('page.all_images') img = facter.review.data['page.all_images'][0] expect(img.tag).to_equal('img') expect(img.get('src')).to_equal('test.png') expect(facter.review.data).to_include('page.images') expect(facter.review.data['page.images']).to_equal(set([])) expect(facter.review.data).to_include('total.size.img') expect(facter.review.data['total.size.img']).to_equal(0) expect(facter.add_fact.call_args_list).to_include( call( key='page.images', value=set([]), )) expect(facter.add_fact.call_args_list).to_include( call( key='total.size.img', value=0, )), expect(facter.add_fact.call_args_list).to_include( call( key='total.requests.img', value=1, )) facter.async_get.assert_called_once_with( 'http://my-site.com/test.png', facter.handle_url_loaded )
def test_can_get_facts(self): page = PageFactory.create(url='http://my-site.com') reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html><link href="a.css" /><link href="a.cse" /></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = CSSFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_include( call( key='page.css', value=set([]), )) expect(facter.add_fact.call_args_list).to_include( call( key='total.size.css', value=0, )) expect(facter.add_fact.call_args_list).to_include( call( key='total.size.css.gzipped', value=0, )) expect(facter.add_fact.call_args_list).to_include( call( key='total.requests.css', value=1, )) expect(facter.review.data).to_length(3) expect(facter.review.data).to_be_like({ 'total.size.css': 0, 'total.size.css.gzipped': 0, 'page.css': set([]) }) facter.async_get.assert_called_once_with( 'http://my-site.com/a.css', facter.handle_url_loaded )
def test_can_get_facts(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = HeadingHierarchyFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.review.data).to_length(1) expect(facter.review.data).to_include('page.heading_hierarchy') expect(facter.add_fact.call_args_list).to_include( call( key='page.heading_hierarchy', value=[ ('h1', 'globo.com'), ('h2', u'ANP: gigantes do petr\xf3leo desistem de leil\xe3o do pr\xe9-sal'), ('h2', u'Pol\xedcia conclui que m\xe3e matou as filhas em SP'), ('h2', u'Isolados por tempestade no M\xe9xico, 42 brasileiros recorrem \xe0 embaixada'), ('h2', 'Show do Sorriso Maroto tem uma morte em tiroteio'), ('h2', u'Casal \xe9 morto e crian\xe7as s\xe3o largadas em via'), ('h2', 'AO VIVO: Almah e Hibria tocam no Rock in Rio'), ('h2', u'SIGA AQUI: f\xe3 da banda Ghost faz homenagem'), ('h2', "Thales pede e Nicole aparece em 'Amor'; veja"), ('h2', u'Vasco e Tim\xe3o t\xeam puni\xe7\xe3o mudada'), ('h2', u'Suposta bronca de Messi gera discuss\xe3o; veja'), ('h3', 'rock in rio'), ('h2', u'not\xedcias'), ('h2', 'esportes'), ('h2', 'entretenimento'), ('h2', 'tecnologia&games'), ('h2', 'moda&beleza'), ('h3', u'semana de moda de mil\xe3o'), ('h3', u'com delineador, sombra e l\xe1pis'), ('h3', 'tudo igual'), ('h3', u'veja tamb\xe9m'), ('h3', 'GNT'), ('h3', 'vogue'), ('h2', u'CASA&DECORA\xc7\xc3O'), ('h3', 'da madeira ao cristal'), ('h3', 'para comer com os olhos'), ('h3', 'Gosta de cor nos ambientes'), ('h3', u'veja tamb\xe9m'), ('h3', 'CASA E JARDIM'), ('h3', 'CASA VOGUE'), ('h2', 'FAMOSOSFAMOSOS'), ('h3', 'ego'), ('h3', 'quem'), ('h2', u'novelas, s\xe9ries, programas e muito maisnovelas, s\xe9ries, programas e muito mais'), ('h3', 'ESTILO TV'), ('h3', 'SARAMANDAIA'), ('h3', u'AMOR \xc0 VIDA'), ('h3', 'SANGUE BOM'), ('h3', 'JOIA RARA'), ('h2', u'M\xfasica'), ('h3', u'm\xfasica.com.br'), ('h3', u'g1 m\xfasica'), ('h3', 'multishow'), ('h3', 'globoradio'), ('h3', 'TOP 3 LETRAS'), ('h4', ''), ('h4', ''), ('h4', ''), ('h3', u'ENCONTRE LETRAS E TRADU\xc7\xd5ES'), ('h3', u'top globot\xe1 todo mundo clicando...'), ('h1', 'globo.tv'), ('h2', u'servi\xe7os') ]))
def test_can_get_facts(self): page = PageFactory.create(url='http://my-site.com/') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) content = '<script type="text/javascript" src="teste.js"></script>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = JSFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_include( call( key='page.js', value=set([]), )) expect(facter.add_fact.call_args_list).to_include( call( key='total.size.js', value=0, )) expect(facter.add_fact.call_args_list).to_include( call( key='total.size.js.gzipped', value=0, )) expect(facter.add_fact.call_args_list).to_include( call( key='total.requests.js', value=1, )) expect(facter.review.data).to_length(3) expect(facter.review.data).to_be_like({ 'total.size.js.gzipped': 0, 'page.js': set([]), 'total.size.js': 0 }) facter.async_get.assert_called_once_with('http://my-site.com/teste.js', facter.handle_url_loaded)
def test_invalid_url(self): page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[]) content = '<html><link href="http://].js" /></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = JSFacter(reviewer) facter.add_fact = Mock() facter.async_get = Mock() facter.get_facts() expect(facter.add_fact.call_args_list).to_include( call( key='page.js', value=set([]), )) expect(facter.add_fact.call_args_list).to_include( call( key='total.size.js', value=0, )) expect(facter.add_fact.call_args_list).to_include( call( key='total.size.js.gzipped', value=0, )) expect(facter.add_fact.call_args_list).to_include( call( key='total.requests.js', value=0, )) expect(facter.review.data).to_include('total.size.js') expect(facter.review.data['total.size.js']).to_equal(0) expect(facter.review.data).to_include('total.size.js.gzipped') expect(facter.review.data['total.size.js.gzipped']).to_equal(0) expect(facter.review.data).to_include('page.js') expect(facter.review.data['page.js']).to_equal(set([]))