def test_analyze_positive(self, data): html = data[0] # badge = data[1] self.wp = webpage_analysis.Webpage("https://harshcasper.github.io", html, self.titles, self.descriptions) self.wp.report()
def crawl(self): """ Crawl the Website and analyze different things. """ self._analyze_crawlers() self._analyze_blog() for page_url in self.pages_to_crawl: resp = requests.get(page_url) if resp.status_code == requests.codes.ok: html = webpage_analysis.Webpage(page_url, resp.content, self.titles, self.descriptions) page_report = html.report() self.report["pages"].append(page_report) self.pages_crawled.append(page_url.strip().lower()) # print("Crawled {0} Pages of {1}: {2}".format( # len(self.pages_crawled), len(self.pages_to_crawl), page_url)) elif resp.status_code == requests.codes.not_found: self.warn(WARNINGS["BROKEN_LINK"], page_url) else: self.warn( WARNINGS["SERVER_ERROR"], "HTTP{0} received for {1}".format(resp.status_code, page_url), ) self.report["site"] = {} self.report["site"]["issues"] = self.issues self.report["site"]["achieved"] = self.achieved return self.report
def test_visible_tags(self, data): html = "" self.wp = webpage_analysis.Webpage("https://harshcasper.github.io", html, self.titles, self.descriptions) soup = self.soup_file(data[0]) elements = soup.findAll(text=True) for tag in elements: result = self.wp.visible_tags(tag) self.assertEqual(result, data[1])
def test_analyze_negative_url(self, data): url = data[0] expected_error = data[1] html = "" self.wp = webpage_analysis.Webpage(url, html, self.titles, self.descriptions) self.wp.report() self.assertTrue( any(issue["warning"] == WARNINGS[expected_error] for issue in self.wp.issues), "{0} not raised.".format(WARNINGS[expected_error]))
def test_analyze_negative(self, data): html = data[0] expected_error = data[1] self.wp = webpage_analysis.Webpage("https://harshcasper.github.io", html, self.titles, self.descriptions) self.wp.report() self.assertTrue( any(issue["warning"] == WARNINGS[expected_error] for issue in self.wp.issues), "{0} not raised.".format(WARNINGS[expected_error]))
def test_analyze_positive_url(self, data): url = data[0] badge = data[1] html = "" self.wp = webpage_analysis.Webpage(url, html, self.titles, self.descriptions) self.wp.report() if badge != "": self.assertTrue( any(earned["achievement"] == BADGES[badge] for earned in self.wp.achieved), "{0} not earned".format(BADGES[badge]))
def test_analyze_duplicates_negative(self, page): html = page[0] expected_error = page[1] report = {"pages": []} for i in range(0, 2): self.wp = webpage_analysis.Webpage( "https://harshcasper.github.io/page{0}.html".format(i), html, self.titles, self.descriptions) page_report = self.wp.report() report['pages'].append(page_report) self.assertTrue( any(issue["warning"] == WARNINGS[expected_error] for p in report['pages'] for issue in p['issues']), "{0} not raised. {1} {2}".format(WARNINGS[expected_error], self.titles, self.descriptions))