def test_init_sitemap_negative(self, sitemap_content, mock_requests):
     sitemap_url = "/sitemap.xml"
     mock_requests.return_value.status_code = requests.codes.not_found
     mock_requests.return_value.content = sitemap_content
     web_page = website_analysis.Spider(self.site_url,
                                        self.site_url + sitemap_url)
     self.assertTrue(self.site_url in web_page.pages_to_crawl)
Пример #2
0
def analyze(domain, sitemap, page):
    """
        Analyzes the Domain/Sitemap/Page passed by the User.
        Args:
            domain: Uniform Resource Locator of the Web Application
            sitempap: An XML Sitemap for a Web Application
            page: Uniform Resource Locator for a single Webpage
        Returns:
            report: JSON Document consisting of all achievements and warnings
        """
    spider = website_analysis.Spider(domain, sitemap, page)
    raw_report = spider.crawl()
    report = json.dumps(raw_report, indent=4, separators=(",", ": "))
    return report
 def test_analyze_blog(self, resp_code, mock_requests):
     mock_requests.return_value.status_code = int(resp_code)
     web_page = website_analysis.Spider(self.site_url, None)
     web_page._analyze_blog()
     if int(resp_code) == requests.codes.ok:
         self.assertTrue(
             any(earned["achievement"] == BADGES["BLOG_DETECTED"]
                 for earned in web_page.achieved),
             "{0} not earned".format(BADGES["BLOG_DETECTED"]))
     else:
         self.assertTrue(
             any(issue["warning"] == WARNINGS["BLOG_MISSING"]
                 for issue in web_page.issues),
             "{0} not raised.".format(WARNINGS["BLOG_MISSING"]))
 def test_crawl(self, data, mock_requests):
     web_page = website_analysis.Spider(self.site_url, None)
     web_page._analyze_crawlers = mock.MagicMock(name="_analyze_crawlers")
     resp_code, content = data.split("|")
     mock_requests.return_value.status_code = int(resp_code)
     mock_requests.return_value.content = content
     web_page.crawl()
     if int(resp_code) == requests.codes.ok:
         self.assertEqual(len(web_page.issues), 0)
     elif int(resp_code) == requests.codes.not_found:
         self.assertTrue(
             any(issue["warning"] == WARNINGS["BROKEN_LINK"]
                 for issue in web_page.issues),
             "{0} not raised.".format(WARNINGS["BROKEN_LINK"]))
     else:
         self.assertTrue(
             any(issue["warning"] == WARNINGS["SERVER_ERROR"]
                 for issue in web_page.issues),
             "{0} not raised.".format(WARNINGS["SERVER_ERROR"]))
 def test_parse_sitemap(self, sitemap_content):
     web_page = website_analysis.Spider(self.site_url, None)
     locations = web_page._parse_sitemap(sitemap_content)
     soup = Soup(sitemap_content, "html.parser")
     urls = soup.findAll('url')
     self.assertEqual(len(locations), len(urls))
 def test_init_url(self):
     web_page = website_analysis.Spider(self.site_url, None)
     self.assertEqual(len(web_page.pages_to_crawl), 1)
     self.assertEqual(web_page.pages_to_crawl[0], self.site_url)