def test_can_get_violation_count_and_points(self): domain = DomainFactory.create() domain2 = DomainFactory.create() DomainFactory.create() page = PageFactory.create(domain=domain) page2 = PageFactory.create(domain=domain) page3 = PageFactory.create(domain=domain2) ReviewFactory.create(domain=domain, page=page, is_active=True, number_of_violations=20) ReviewFactory.create(domain=domain, page=page2, is_active=True, number_of_violations=10) ReviewFactory.create(domain=domain2, page=page3, is_active=True, number_of_violations=30) violation_count = domain.get_violation_data(self.db) expect(violation_count).to_equal(30)
def test_can_decrement_next_jobs_count(self): self.db.query(Page).delete() self.db.query(Domain).delete() key = 'next-jobs' self.sync_cache.redis.delete(key) globocom = DomainFactory.create( url="http://globo.com", name="globo.com", is_active=True ) for i in range(2): PageFactory.create(domain=globocom) self.sync_cache.increment_next_jobs_count(-2) page_count = self.sync_cache.redis.get(key) expect(page_count).to_equal('0') # should get from cache self.sync_cache.db = None self.sync_cache.increment_next_jobs_count(10) page_count = self.sync_cache.redis.get(key) expect(page_count).to_equal('10')
def test_can_get_domain_details(self): domain = DomainFactory.create(url="http://www.domain-details.com", name="domain-details.com") page = PageFactory.create(domain=domain, url=domain.url) page2 = PageFactory.create(domain=domain) ReviewFactory.create(page=page, is_active=True, is_complete=True, number_of_violations=20) ReviewFactory.create(page=page2, is_active=True, is_complete=True, number_of_violations=30) response = yield self.authenticated_fetch('/domains/%s/' % domain.name) expect(response.code).to_equal(200) domain_details = loads(response.body) expect(domain_details['name']).to_equal('domain-details.com') expect(domain_details['pageCount']).to_equal(2) expect(domain_details['reviewCount']).to_equal(2) expect(domain_details['violationCount']).to_equal(50) expect(domain_details['reviewPercentage']).to_equal(100.00) expect(domain_details['errorPercentage']).to_equal(0) expect(domain_details['averageResponseTime']).to_equal(0) expect(domain_details['is_active']).to_be_true() expect(domain_details['homepageId']).to_equal(str(page.uuid)) expect(domain_details['homepageReviewId']).to_equal( str(page.last_review_uuid))
def test_get_count(self): key = 'g.com-my-key' self.sync_cache.redis.delete(key) gcom = DomainFactory.create(url="http://g.com", name="g.com") PageFactory.create(domain=gcom) count = self.sync_cache.get_count( key, gcom.name, int(self.config.PAGE_COUNT_EXPIRATION_IN_SECONDS), lambda domain: domain.get_page_count(self.db) ) expect(count).to_equal(1) # should get from cache self.sync_cache.db = None count = self.sync_cache.get_count( key, gcom.name, int(self.config.PAGE_COUNT_EXPIRATION_IN_SECONDS), lambda domain: domain.get_page_count(self.db) ) expect(count).to_equal(1)
def test_increment_active_review_count(self): key = 'g.com-active-review-count' self.cache.redis.delete(key) gcom = DomainFactory.create(url='http://g.com', name='g.com') page = PageFactory.create(domain=gcom) ReviewFactory.create( is_active=True, is_complete=True, domain=gcom, page=page, number_of_violations=1 ) page = PageFactory.create(domain=gcom) ReviewFactory.create( is_active=False, is_complete=True, domain=gcom, page=page, number_of_violations=3 ) page_count = yield self.cache.get_active_review_count('g.com') expect(page_count).to_equal(1) yield self.cache.increment_active_review_count('g.com') page_count = yield self.cache.get_active_review_count('g.com') expect(page_count).to_equal(2)
def test_increment_active_review_count(self): key = 'g.com-active-review-count' self.sync_cache.redis.delete(key) gcom = DomainFactory.create(url='http://g.com', name='g.com') page = PageFactory.create(domain=gcom) ReviewFactory.create( is_active=True, is_complete=True, domain=gcom, page=page, number_of_violations=1 ) page = PageFactory.create(domain=gcom) ReviewFactory.create( is_active=False, is_complete=True, domain=gcom, page=page, number_of_violations=3 ) self.sync_cache.increment_active_review_count(gcom.name) active_review_count = self.sync_cache.redis.get(key) expect(active_review_count).to_equal('1') self.sync_cache.increment_active_review_count(gcom.name) active_review_count = self.sync_cache.redis.get(key) expect(active_review_count).to_equal('2')
def test_can_get_domain_details(self): self.clean_cache('domain-details.com') domain = DomainFactory.create(url="http://www.domain-details.com", name="domain-details.com") page = PageFactory.create(domain=domain) page2 = PageFactory.create(domain=domain) ReviewFactory.create(page=page, is_active=True, is_complete=True, number_of_violations=20) ReviewFactory.create(page=page2, is_active=True, is_complete=True, number_of_violations=30) response = yield self.http_client.fetch( self.get_url('/domains/%s/' % domain.name) ) expect(response.code).to_equal(200) domain_details = loads(response.body) expect(domain_details['name']).to_equal('domain-details.com') expect(domain_details['pageCount']).to_equal(2) expect(domain_details['reviewCount']).to_equal(2) expect(domain_details['violationCount']).to_equal(50) expect(domain_details['reviewPercentage']).to_equal(100.00) expect(domain_details['statusCodeInfo']).to_equal([]) expect(domain_details['errorPercentage']).to_equal(0) expect(domain_details['averageResponseTime']).to_equal(0) expect(domain_details['is_active']).to_be_true()
def test_can_get_last_reviews_count_in_last_hour_filter_by_domain(self): dt = datetime.utcnow() domain1 = DomainFactory.create() domain2 = DomainFactory.create() page1 = PageFactory.create(domain=domain1) page2 = PageFactory.create(domain=domain2) ReviewFactory.create(is_active=True, completed_date=dt - timedelta(minutes=1), page=page1) ReviewFactory.create(is_active=True, completed_date=dt - timedelta(minutes=59), page=page1) ReviewFactory.create(is_active=True, completed_date=dt - timedelta(minutes=1), page=page2) ReviewFactory.create(is_active=True, completed_date=dt - timedelta(minutes=59), page=page2) self.db.flush() response = yield self.authenticated_fetch( '/reviews-in-last-hour?domain_filter=%s' % domain1.name) expect(response.code).to_equal(200) result = loads(response.body) expect(result['count']).to_equal(2)
def test_can_get_domain_reviews(self): dt = datetime(2010, 11, 12, 13, 14, 15) dt_timestamp = calendar.timegm(dt.utctimetuple()) dt2 = datetime(2011, 12, 13, 14, 15, 16) dt2_timestamp = calendar.timegm(dt2.utctimetuple()) domain = DomainFactory.create(url="http://www.domain-details.com", name="domain-details.com") page = PageFactory.create(domain=domain, last_review_date=dt) page2 = PageFactory.create(domain=domain, last_review_date=dt2) ReviewFactory.create(page=page, is_active=True, is_complete=True, completed_date=dt, number_of_violations=20) ReviewFactory.create(page=page, is_active=False, is_complete=True, completed_date=dt2, number_of_violations=30) ReviewFactory.create(page=page2, is_active=True, is_complete=True, completed_date=dt2, number_of_violations=30) ReviewFactory.create(page=page2, is_active=False, is_complete=True, completed_date=dt, number_of_violations=20) response = yield self.http_client.fetch( self.get_url('/domains/%s/reviews/' % domain.name) ) expect(response.code).to_equal(200) domain_details = loads(response.body) expect(domain_details['pages']).to_length(2) expect(domain_details['pages'][1]['url']).to_equal(page2.url) expect(domain_details['pages'][1]['uuid']).to_equal(str(page2.uuid)) expect(domain_details['pages'][1]['completedAt']).to_equal(dt2_timestamp) expect(domain_details['pages'][0]['url']).to_equal(page.url) expect(domain_details['pages'][0]['uuid']).to_equal(str(page.uuid)) expect(domain_details['pages'][0]['completedAt']).to_equal(dt_timestamp)
def test_update_pages_score(self): config = Config() config.MAX_PAGE_SCORE = 15000000 self.db.query(Page).delete() self.sync_cache.redis.delete('pages-score') page1 = PageFactory.create(score=3) page2 = PageFactory.create(score=0) for i in range(3): self.sync_cache.increment_page_score(page1.id) self.sync_cache.increment_page_score(page2.id) expect(page1.score).to_equal(3) expect(page2.score).to_equal(0) Page.update_pages_score(self.db, self.sync_cache, config) self.db.flush() self.db.refresh(page1) self.db.refresh(page2) expect(page1.score).to_equal(6) expect(page2.score).to_equal(1)
def test_can_get_domain_details(self): domain = DomainFactory.create(url="http://www.domain-details.com", name="domain-details.com") page = PageFactory.create(domain=domain, url=domain.url) page2 = PageFactory.create(domain=domain) ReviewFactory.create(page=page, is_active=True, is_complete=True, number_of_violations=20) ReviewFactory.create(page=page2, is_active=True, is_complete=True, number_of_violations=30) response = yield self.authenticated_fetch('/domains/%s/' % domain.name) expect(response.code).to_equal(200) domain_details = loads(response.body) expect(domain_details['name']).to_equal('domain-details.com') expect(domain_details['pageCount']).to_equal(2) expect(domain_details['reviewCount']).to_equal(2) expect(domain_details['violationCount']).to_equal(50) expect(domain_details['reviewPercentage']).to_equal(100.00) expect(domain_details['errorPercentage']).to_equal(0) expect(domain_details['averageResponseTime']).to_equal(0) expect(domain_details['is_active']).to_be_true() expect(domain_details['homepageId']).to_equal(str(page.uuid)) expect(domain_details['homepageReviewId']).to_equal(str(page.last_review_uuid))
def test_can_get_domain_grouped_violations(self): domain1 = DomainFactory.create() domain2 = DomainFactory.create() page1 = PageFactory.create(domain=domain1) page2 = PageFactory.create(domain=domain1) page3 = PageFactory.create(domain=domain2) ReviewFactory.create(domain=domain1, page=page1, is_active=True, number_of_violations=5) ReviewFactory.create(domain=domain1, page=page2, is_active=True, number_of_violations=7) ReviewFactory.create(domain=domain2, page=page3, is_active=True, number_of_violations=9) self.server.application.violation_definitions = { 'key.%s' % i: { 'title': 'title.%s' % i, 'category': 'category.%s' % (i % 3), 'key': Key.get_or_create(self.db, 'key.%d' % i, 'category.%d' % (i % 3)) } for i in xrange(9) } response = yield self.http_client.fetch( self.get_url('/domains/%s/violations' % domain1.name) ) expect(response.code).to_equal(200) domain_violations = loads(response.body) expect(domain_violations).to_length(5) expect(domain_violations.keys()).to_be_like(['domainName', 'violations', 'total', 'domainURL', 'domainId']) expect(domain_violations['total']).to_equal(12) expect(domain_violations['violations']).to_length(3) counts = map(lambda v: v['count'], domain_violations['violations']) expect(counts).to_be_like([5, 4, 3])
def test_can_get_domains_details(self): self.db.query(Domain).delete() details = Domain.get_domains_details(self.db) expect(details).to_length(0) domain = DomainFactory.create(name='domain-1.com', url='http://domain-1.com/') domain2 = DomainFactory.create(name='domain-2.com', url='http://domain-2.com/') DomainFactory.create() page = PageFactory.create(domain=domain) page2 = PageFactory.create(domain=domain) page3 = PageFactory.create(domain=domain2) ReviewFactory.create(domain=domain, page=page, is_active=True, number_of_violations=20) ReviewFactory.create(domain=domain, page=page2, is_active=True, number_of_violations=10) ReviewFactory.create(domain=domain2, page=page3, is_active=True, number_of_violations=30) RequestFactory.create(status_code=200, domain_name=domain.name, response_time=0.25) RequestFactory.create(status_code=304, domain_name=domain.name, response_time=0.35) RequestFactory.create(status_code=400, domain_name=domain.name, response_time=0.25) RequestFactory.create(status_code=403, domain_name=domain.name, response_time=0.35) RequestFactory.create(status_code=404, domain_name=domain.name, response_time=0.25) details = Domain.get_domains_details(self.db) expect(details).to_length(3) expect(details[0]).to_length(10) expect(details[0]['url']).to_equal('http://domain-1.com/') expect(details[0]['name']).to_equal('domain-1.com') expect(details[0]['violationCount']).to_equal(30) expect(details[0]['pageCount']).to_equal(2) expect(details[0]['reviewCount']).to_equal(2) expect(details[0]['reviewPercentage']).to_equal(100.0) expect(details[0]['errorPercentage']).to_equal(60.0) expect(details[0]['is_active']).to_be_true() expect(details[0]['averageResponseTime']).to_equal(0.3)
def test_can_get_page_by_uuid(self): page = PageFactory.create() PageFactory.create() loaded_page = Page.by_uuid(page.uuid, self.db) expect(loaded_page.id).to_equal(page.id) invalid_page = Page.by_uuid(uuid4(), self.db) expect(invalid_page).to_be_null()
def test_can_get_page_by_url_hash(self): page = PageFactory.create() PageFactory.create() loaded_page = Page.by_url_hash(page.url_hash, self.db) expect(loaded_page.id).to_equal(page.id) invalid_page = Page.by_uuid('123', self.db) expect(invalid_page).to_be_null()
def test_get_next_job_does_not_get_from_inactive_domains(self): domain = DomainFactory.create(is_active=False) PageFactory.create(domain=domain) next_job = Page.get_next_job( self.db, expiration=100, cache=self.sync_cache, lock_expiration=1 ) expect(next_job).to_be_null()
def test_get_next_job_list(self): page = PageFactory.create() PageFactory.create() next_job_list = Page.get_next_job_list(self.db, expiration=100) expect(next_job_list).to_length(2) pages = [{'url': x.url, 'uuid': str(x.uuid)} for x in next_job_list] expect(pages).to_include({ 'url': page.url, 'uuid': str(page.uuid) })
def test_can_increment_page_count(self): self.db.query(Domain).delete() globocom = DomainFactory.create(url="http://globo.com", name="globo.com") for i in range(2): PageFactory.create(domain=globocom) page_count = yield self.cache.get_page_count('globo.com') expect(page_count).to_equal(2) page_count = yield self.cache.increment_page_count('globo.com', 10) expect(page_count).to_equal(12)
def test_can_get_domain_top_category_violations(self): domain1 = DomainFactory.create() domain2 = DomainFactory.create() page1 = PageFactory.create(domain=domain1) page2 = PageFactory.create(domain=domain1) page3 = PageFactory.create(domain=domain2) ReviewFactory.create(domain=domain1, page=page1, is_active=True, number_of_violations=5) ReviewFactory.create(domain=domain1, page=page2, is_active=True, number_of_violations=7) ReviewFactory.create(domain=domain2, page=page3, is_active=True, number_of_violations=9) self.server.application.violation_definitions = { 'key.%s' % i: { 'title': 'title.%s' % i, 'category': 'category.%s' % (i % 3), 'key': Key.get_or_create(self.db, 'key.%d' % i, 'category.%d' % (i % 3)) } for i in range(9) } key = Key.get_by_name(self.db, 'key.0') response = yield self.authenticated_fetch( '/domains/%s/violations/%d/' % (domain1.name, key.category_id)) expect(response.code).to_equal(200) domain_top_category = loads(response.body) expect(domain_top_category).to_length(5) expect(domain_top_category.keys()).to_be_like([ 'violations', 'domainId', 'categoryId', 'domainURL', 'domainName' ]) expect(domain_top_category['violations']).to_length(3) counts = map(lambda v: v['count'], domain_top_category['violations']) expect(counts).to_be_like([2, 1, 1])
def test_increment_page_count(self): key = 'g.com-page-count' self.sync_cache.redis.delete(key) gcom = DomainFactory.create(url="http://g.com", name="g.com") PageFactory.create(domain=gcom) self.sync_cache.increment_page_count(gcom.name) page_count = self.sync_cache.redis.get(key) expect(page_count).to_equal('1') self.sync_cache.increment_page_count(gcom.name) page_count = self.sync_cache.redis.get(key) expect(page_count).to_equal('2')
def test_can_get_pages_per_domain(self): domain = DomainFactory.create() domain2 = DomainFactory.create() DomainFactory.create() PageFactory.create(domain=domain) PageFactory.create(domain=domain) PageFactory.create(domain=domain2) PageFactory.create(domain=domain2) PageFactory.create(domain=domain2) pages_per_domain = Domain.get_pages_per_domain(self.db) expect(pages_per_domain).to_be_like({domain.id: 2, domain2.id: 3})
def test_can_get_last_reviews_with_domain_filter(self): dt1 = datetime(2010, 10, 10, 10, 10, 10) dt2 = datetime(2010, 10, 11, 10, 10, 10) dt3 = datetime(2010, 10, 12, 10, 10, 10) domain1 = DomainFactory.create() domain2 = DomainFactory.create() page1 = PageFactory.create(domain=domain1) page2 = PageFactory.create(domain=domain2) ReviewFactory.create(is_active=True, is_complete=True, page=page1, completed_date=dt1) ReviewFactory.create(is_active=True, is_complete=True, page=page1, completed_date=dt2) ReviewFactory.create(is_active=True, is_complete=True, page=page1, completed_date=dt3) ReviewFactory.create(is_active=True, is_complete=True, page=page2, completed_date=dt1) ReviewFactory.create(is_active=True, is_complete=True, page=page2, completed_date=dt2) ReviewFactory.create(is_active=True, is_complete=True, page=page2, completed_date=dt3) response = yield self.authenticated_fetch( '/last-reviews?domain_filter=%s' % domain1.name) expect(response.code).to_equal(200) expect(len(loads(response.body))).to_be_like(3) expect(all([x['domain'] == domain1.name for x in loads(response.body)])).to_be_true() response = yield self.authenticated_fetch('/last-reviews') expect(response.code).to_equal(200) expect(len(loads(response.body))).to_be_like(6)
def test_can_get_next_jobs_count(self): config = Config() config.REVIEW_EXPIRATION_IN_SECONDS = 100 for x in range(3): PageFactory.create() next_job_list = Page.get_next_jobs_count(self.db, config) expect(next_job_list).to_equal(3) for x in range(2): PageFactory.create() next_job_list = Page.get_next_jobs_count(self.db, config) expect(next_job_list).to_equal(5)
def test_can_get_default_violations_values(self): config = Config() config.REQUIRED_META_TAGS = ['description'] page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = RequiredMetaTagsValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('absent.meta.tags') expect(violations_values['absent.meta.tags']).to_length(2) expect(violations_values['absent.meta.tags']).to_be_like({ 'value': config.REQUIRED_META_TAGS, 'description': config.get_description('REQUIRED_META_TAGS') })
def test_add_violation_when_404(self): page = PageFactory.create(url='http://globo.com') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) validator = SitemapValidator(reviewer) validator.review.data['sitemap.files.size'] = { 'http://g1.globo.com/sitemap.xml': 10 } validator.review.data['sitemap.data'] = { 'http://g1.globo.com/sitemap.xml': Mock(status_code=404, text=None) } validator.add_violation = Mock() validator.validate() validator.add_violation.assert_called_once_with( key='sitemap.not_found', value='http://g1.globo.com/sitemap.xml', points=100)
def test_add_violation_when_sitemap_is_too_large(self): page = PageFactory.create(url='http://globo.com') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) validator = SitemapValidator(reviewer) validator.review.data['sitemap.files.size'] = { 'http://g1.globo.com/sitemap.xml': 10241 } validator.review.data['sitemap.data'] = { 'http://g1.globo.com/sitemap.xml': Mock(status_code=200, text='data') } validator.review.data['sitemap.files.urls'] = { 'http://g1.globo.com/sitemap.xml': 10 } validator.review.data['sitemap.urls'] = { 'http://g1.globo.com/sitemap.xml': [] } validator.add_violation = Mock() validator.validate() validator.add_violation.assert_called_once_with( key='total.size.sitemap', value={ 'url': 'http://g1.globo.com/sitemap.xml', 'size': 10.0009765625 }, points=10)
def test_add_violation_when_sitemap_is_too_large(self): page = PageFactory.create(url='http://globo.com') reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[] ) validator = SitemapValidator(reviewer) validator.review.data['sitemap.files.size'] = {'http://g1.globo.com/sitemap.xml': 10241} validator.review.data['sitemap.data'] = {'http://g1.globo.com/sitemap.xml': Mock(status_code=200, text='data')} validator.review.data['sitemap.files.urls'] = {'http://g1.globo.com/sitemap.xml': 10} validator.review.data['sitemap.urls'] = {'http://g1.globo.com/sitemap.xml': []} validator.add_violation = Mock() validator.validate() validator.add_violation.assert_called_once_with( key='total.size.sitemap', value={ 'url': 'http://g1.globo.com/sitemap.xml', 'size': 10.0009765625 }, points=10 )
def test_add_violation_when_sitemap_with_good_link(self): page = PageFactory.create(url='http://globo.com') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) validator = SitemapValidator(reviewer) validator.review.data['sitemap.files.size'] = { 'http://g1.globo.com/sitemap.xml': 10 } validator.review.data['sitemap.data'] = { 'http://g1.globo.com/sitemap.xml': Mock(status_code=200, text='data', url='http://g1.globo.com/%C3%BCmlat.php&q=name') } validator.review.data['sitemap.files.urls'] = { 'http://g1.globo.com/sitemap.xml': 20 } validator.review.data['sitemap.urls'] = { 'http://g1.globo.com/sitemap.xml': ['http://g1.globo.com/%C3%BCmlat.php&q=name'] } validator.add_violation = Mock() validator.flush = Mock() validator.validate() expect(validator.add_violation.call_count).to_equal(0) expect(validator.flush.call_count).to_equal(1)
def test_can_get_default_violations_values(self): config = Config() config.FORCE_CANONICAL = False page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) validator = LinkWithRelCanonicalValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('absent.meta.canonical') expect(violations_values['absent.meta.canonical']).to_length(2) expect(violations_values['absent.meta.canonical']).to_be_like({ 'value': config.FORCE_CANONICAL, 'description': config.get_description('FORCE_CANONICAL') })
def test_can_get_page_count(self): domain = DomainFactory.create() domain2 = DomainFactory.create() DomainFactory.create() PageFactory.create(domain=domain) PageFactory.create(domain=domain) PageFactory.create(domain=domain2) PageFactory.create(domain=domain2) PageFactory.create(domain=domain2) pages_for_domain = domain.get_page_count(self.db) pages_for_domain_2 = domain2.get_page_count(self.db) expect(pages_for_domain).to_equal(2) expect(pages_for_domain_2).to_equal(3)
def test_can_load_url_with_empy_headers(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content), } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = LastModifiedFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.review.data).to_length(0) expect(facter.review.data).to_be_like({}) expect(facter.add_fact.called).to_be_false()
def test_can_validate_css_requests_on_globo_html(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[], cache=self.sync_cache ) reviewer.violation_definitions = { 'total.requests.css': {'default_value': 1}, 'total.size.css': {'default_value': 0.0}, } content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = CSSRequestsValidator(reviewer) css = { 'url': 'some_style.css', 'status': 200, 'content': '#id{display:none}', 'html': None } validator.get_response = Mock(return_value=css) validator.add_violation = Mock() validator.review.data = { 'total.requests.css': 7, 'total.size.css.gzipped': 0.05 } validator.validate() expect(validator.add_violation.call_args_list).to_include( call( key='total.requests.css', value={'over_limit': 6, 'total_css_files': 7}, points=30 )) expect(validator.add_violation.call_args_list).to_include( call( key='total.size.css', value=0.05, points=0 ))
def test_can_get_default_violations_values(self): config = Config() config.SCHEMA_ORG_ITEMTYPE = [ 'http://schema.org/WebPage', 'http://schema.org/AboutPage', ] page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = SchemaOrgItemTypeValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('invalid.schema.itemtype') expect(violations_values['invalid.schema.itemtype']).to_length(2) expect(violations_values['invalid.schema.itemtype']).to_equal({ 'value': config.SCHEMA_ORG_ITEMTYPE, 'description': config.get_description('SCHEMA_ORG_ITEMTYPE') })
def test_can_get_violations_per_day(self): dt = datetime(2013, 10, 10, 10, 10, 10) dt2 = datetime(2013, 10, 11, 10, 10, 10) dt3 = datetime(2013, 10, 12, 10, 10, 10) domain = DomainFactory.create() page = PageFactory.create(domain=domain) ReviewFactory.create(domain=domain, page=page, is_active=False, is_complete=True, completed_date=dt, number_of_violations=20) ReviewFactory.create(domain=domain, page=page, is_active=False, is_complete=True, completed_date=dt2, number_of_violations=10) ReviewFactory.create(domain=domain, page=page, is_active=True, is_complete=True, completed_date=dt3, number_of_violations=30) violations = domain.get_violations_per_day(self.db) expect(violations).to_be_like([ { "completedAt": "2013-10-10", "violation_count": 20, "violation_points": 190 }, { "completedAt": "2013-10-11", "violation_count": 10, "violation_points": 45 }, { "completedAt": "2013-10-12", "violation_count": 30, "violation_points": 435 } ])
def test_no_body_tag(self): config = Config() page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = SchemaOrgItemTypeValidator(reviewer) validator.add_violation = Mock() validator.validate() expect(validator.add_violation.called).to_be_false()
def test_can_get_default_violations_values(self): config = Config() config.REQUIRED_META_TAGS = ['description'] page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) validator = RequiredMetaTagsValidator(reviewer) violations_values = validator.get_default_violations_values(config) expect(violations_values).to_include('absent.meta.tags') expect(violations_values['absent.meta.tags']).to_length(2) expect(violations_values['absent.meta.tags']).to_be_like({ 'value': config.REQUIRED_META_TAGS, 'description': config.get_description('REQUIRED_META_TAGS') })
def test_can_search(self): dt = datetime.now() page = PageFactory.create(url="http://www.mypage.something.com") review1 = ReviewFactory.create(page=page, is_active=True, is_complete=True, completed_date=dt, number_of_violations=20) self.db.flush() page.last_review = review1 page.last_review_date = dt self.db.flush() response = yield self.authenticated_fetch( '/search?term=http://www.mypage.something.com') expect(response.code).to_equal(200) obj = loads(response.body) expect(obj).to_be_like({ u'url': u'http://www.mypage.something.com', u'reviewId': str(review1.uuid), u'uuid': str(page.uuid), u'domain': 'mypage.something.com' })
def test_no_title_tag(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() reviewer.content_loaded(page.url, Mock(status_code=200, text=content, headers={})) facter = TitleFacter(reviewer) facter.add_fact = Mock() facter.get_facts() expect(facter.add_fact.called).to_be_false() expect(facter.review.data).to_be_like({})
def test_page_without_google_analytics_domain(self): config = Config() page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[] ) validator = GoogleAnalyticsValidator(reviewer) validator.add_violation = Mock() validator.review.data = { 'page.google_analytics': set([('UA-296593-2', None)]), } validator.validate() expect(validator.add_violation.call_args_list).to_include( call( key='google_analytics.domain.not_found', points=50, value=None ))
def test_can_get_fact_definitions(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), facters=[] ) content = self.get_file('globo.html') result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer._wait_for_async_requests = Mock() reviewer.save_review = Mock() response = Mock(status_code=200, text=content, headers={}) reviewer.content_loaded(page.url, response) facter = TitleFacter(reviewer) definitions = facter.get_fact_definitions() expect(definitions).to_length(1) expect('page.title' in definitions).to_be_true()
def test_validate(self): config = Config() page = PageFactory.create(url='http://globo.com/1?item=test') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) content = '<html><head></head></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = LinkWithRelCanonicalValidator(reviewer) validator.add_violation = Mock() validator.review.data = {'page.head': [{}]} validator.validate() expect(validator.add_violation.call_args_list).to_include( call(key='absent.meta.canonical', value=None, points=30))
def test_can_get_violations_per_day(self): dt = datetime(2013, 10, 10, 10, 10, 10) dt2 = datetime(2013, 10, 11, 10, 10, 10) dt3 = datetime(2013, 10, 12, 10, 10, 10) page = PageFactory.create() ReviewFactory.create(page=page, is_active=False, is_complete=True, completed_date=dt, number_of_violations=20) ReviewFactory.create(page=page, is_active=False, is_complete=True, completed_date=dt2, number_of_violations=10) ReviewFactory.create(page=page, is_active=True, is_complete=True, completed_date=dt3, number_of_violations=30) response = yield self.authenticated_fetch('/domains/%s/violations-per-day/' % page.domain.name) expect(response.code).to_equal(200) domain_details = loads(response.body) expect(domain_details['violations']).to_be_like([ { u'completedAt': u'2013-10-10', u'violation_points': 190, u'violation_count': 20 }, { u'completedAt': u'2013-10-11', u'violation_points': 45, u'violation_count': 10 }, { u'completedAt': u'2013-10-12', u'violation_points': 435, u'violation_count': 30 } ])
def test_handle_sitemap_url_loaded(self): page = PageFactory.create(url="http://g1.globo.com/") reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[] ) reviewer.enqueue = Mock() content = self.get_file('url_sitemap.xml') response = Mock(status_code=200, text=content) facter = SitemapFacter(reviewer) facter.async_get = Mock() facter.get_facts() facter.handle_sitemap_loaded("http://g1.globo.com/sitemap.xml", response) expect(facter.review.data['sitemap.files.size']["http://g1.globo.com/sitemap.xml"]).to_equal(0.296875) expect(facter.review.data['sitemap.urls']["http://g1.globo.com/sitemap.xml"]).to_equal(set(['http://domain.com/1.html', 'http://domain.com/2.html'])) expect(facter.review.facts['total.size.sitemap']['value']).to_equal(0.296875) expect(facter.review.facts['total.size.sitemap.gzipped']['value']).to_equal(0.1494140625) expect(facter.review.data['total.size.sitemap']).to_equal(0.296875) expect(facter.review.data['total.size.sitemap.gzipped']).to_equal(0.1494140625) expect(facter.review.data['sitemap.files.urls']["http://g1.globo.com/sitemap.xml"]).to_equal(2) expect(facter.review.facts['total.sitemap.urls']['value']).to_equal(2)
def test_can_get_reviews_for_domain(self): dt = datetime(2013, 10, 10, 10, 10, 10) dt2 = datetime(2013, 10, 11, 10, 10, 10) dt3 = datetime(2013, 10, 12, 10, 10, 10) domain = DomainFactory.create() page = PageFactory.create(domain=domain, last_review_date=dt3) ReviewFactory.create(page=page, is_active=False, is_complete=True, completed_date=dt, number_of_violations=20) ReviewFactory.create(page=page, is_active=False, is_complete=True, completed_date=dt2, number_of_violations=10) review = ReviewFactory.create(page=page, is_active=True, is_complete=True, completed_date=dt3, number_of_violations=30) reviews = domain.get_active_reviews(self.db) expect(reviews).to_length(1) expect(reviews[0].last_review_uuid).to_equal(str(review.uuid))
def test_can_validate_last_modified(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[] ) validator = LastModifiedValidator(reviewer) validator.add_violation = Mock() validator.review.data = { 'page.last_modified': None } validator.review.facts = { 'page.last_modified': None } validator.validate() validator.add_violation.assert_called_once_with( key='page.last_modified.not_found', value=page.url, points=50 )
def test_can_validate_with_headers(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[] ) validator = LastModifiedValidator(reviewer) validator.add_violation = Mock() validator.review.data = { 'page.last_modified': datetime.datetime(2014, 1, 13, 1, 16, 10) } validator.review.facts = { 'page.last_modified': datetime.datetime(2014, 1, 13, 1, 16, 10) } validator.validate() expect(validator.add_violation.called).to_be_false()
def test_add_violation_when_sitemap_has_links_that_not_need_to_be_encoded( self): page = PageFactory.create(url='http://globo.com') reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) validator = SitemapValidator(reviewer) validator.review.data['sitemap.files.size'] = { 'http://g1.globo.com/sitemap.xml': 10 } validator.review.data['sitemap.data'] = { 'http://g1.globo.com/sitemap.xml': Mock(status_code=200, text='data') } validator.review.data['sitemap.files.urls'] = { 'http://g1.globo.com/sitemap.xml': 20 } validator.review.data['sitemap.urls'] = { 'http://g1.globo.com/sitemap.xml': ['http://g1.globo.com/1.html'] } validator.add_violation = Mock() validator.validate() expect(validator.add_violation.call_count).to_equal(0)
def test_add_violation_when_sitemap_has_links_that_need_to_be_encoded_with_amp(self): page = PageFactory.create(url='http://globo.com') reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[] ) validator = SitemapValidator(reviewer) validator.review.data['sitemap.files.size'] = {'http://g1.globo.com/sitemap.xml': 10} validator.review.data['sitemap.data'] = {'http://g1.globo.com/sitemap.xml': Mock(status_code=200, text='data')} validator.review.data['sitemap.files.urls'] = {'http://g1.globo.com/sitemap.xml': 20} validator.review.data['sitemap.urls'] = {'http://g1.globo.com/sitemap.xml': ['http://g1.globo.com/%C3%BCmlat.php&q=name']} validator.add_violation = Mock() validator.validate() validator.add_violation.assert_called_once_with( key='sitemap.links.not_encoded', value={'url': 'http://g1.globo.com/sitemap.xml', 'links': 1}, points=10 )
def test_can_get_domains_full_data(self): domains = [] for i in range(3): domains.append(DomainFactory.create(name='domain-%d.com' % i)) pages = [] for i, domain in enumerate(domains): pages.append([]) for j in range(3): pages[i].append(PageFactory.create(domain=domain)) requests = reviews = [] for i, (domain, page) in enumerate(zip(domains, pages)): for j in range(i + 1): reviews.append( ReviewFactory.create(domain=domain, page=page[j], is_active=True, number_of_violations=(5 + 2 * j))) requests.append( RequestFactory.create(status_code=200 if j % 2 == 0 else 404, domain_name=domain.name, response_time=0.25 * (i + 1))) self.server.application.violation_definitions = { 'key.%s' % i: { 'title': 'title.%s' % i, 'category': 'category.%s' % (i % 3), 'key': Key.get_or_create(self.db, 'key.%d' % i, 'category.%d' % (i % 3)) } for i in range(9) } response = yield self.authenticated_fetch('/domains-details') expect(response.code).to_equal(200) full_data = loads(response.body) expect(full_data).to_length(3) expect(full_data[0].keys()).to_length(10) expect(map(lambda d: d['name'], full_data)).to_be_like( ['domain-0.com', 'domain-1.com', 'domain-2.com']) expect(map(lambda d: d['pageCount'], full_data)).to_be_like([3, 3, 3]) expect(map(lambda d: d['reviewCount'], full_data)).to_be_like([1, 2, 3]) expect(map(lambda d: d['violationCount'], full_data)).to_be_like([5, 12, 21]) expect(map(lambda d: d['reviewPercentage'], full_data)).to_be_like([33.33, 66.67, 100.0]) expect(map(lambda d: d['errorPercentage'], full_data)).to_be_like([0.0, 50.0, 33.33]) expect(map(lambda d: d['averageResponseTime'], full_data)).to_be_like([0.25, 0.5, 0.75])
def test_can_validate_no_title_tag(self): page = PageFactory.create() reviewer = Reviewer( api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[] ) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = TitleValidator(reviewer) validator.add_violation = Mock() validator.validate() validator.add_violation.assert_called_once_with( key='page.title.not_found', value=page.url, points=50)
def test_get_robots_from_root_domain(self): page = PageFactory.create(url="http://www.globo.com") reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=Config(), validators=[]) facter = RobotsFacter(reviewer) facter.async_get = Mock() facter.add_fact = Mock() facter.get_facts() robots_url = 'http://www.globo.com/robots.txt' expect(facter.review.data).to_length(1) expect(facter.review.data['robots.response']).to_equal(None) facter.async_get.assert_called_once_with(robots_url, facter.handle_robots_loaded) response = Mock(status_code=200, text='', headers={}) facter.handle_robots_loaded(robots_url, response) expect(facter.review.data['robots.response']).to_equal(response) expect(facter.add_fact.call_args_list).to_include( call( key='robots.url', value=robots_url, ))
def test_query_string_without_params(self): config = Config() config.FORCE_CANONICAL = False page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) content = '<html><head></head></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = LinkWithRelCanonicalValidator(reviewer) validator.add_violation = Mock() validator.review.data = {'page.head': [{}]} validator.validate() expect(validator.add_violation.called).to_be_false()
def test_can_validate_without_meta_tags(self): config = Config() page = PageFactory.create() reviewer = Reviewer(api_url='http://localhost:2368', page_uuid=page.uuid, page_url=page.url, page_score=0.0, config=config, validators=[]) content = '<html></html>' result = { 'url': page.url, 'status': 200, 'content': content, 'html': lxml.html.fromstring(content) } reviewer.responses[page.url] = result reviewer.get_response = Mock(return_value=result) validator = OpenGraphValidator(reviewer) validator.add_violation = Mock() validator.validate() expect(validator.add_violation.called).to_be_false()