def test_no_links(self): self.start_server(TestCrawler.NO_LINKS_HTML) crawler = Crawler(TestCrawler.SERVER) crawler.crawl() self.assertMultiLineEqual( "[\n {\n \"assets\": [], \n \"url\": \"http://127.0.0.1:5000/\"\n }\n]", crawler.assets_json())
def test_image_links(self): self.start_server(TestCrawler.IMAGE_LINKS_HTML) crawler = Crawler(TestCrawler.SERVER) crawler.crawl() expected = \ '''[ { "assets": [ "http://127.0.0.1:5000/funny.gif" ], "url": "http://127.0.0.1:5000/" } ]''' self.assertMultiLineEqual(expected, crawler.assets_json())
def test_stylesheet_links(self): self.start_server(TestCrawler.STYLESHEET_LINKS_HTML) crawler = Crawler(TestCrawler.SERVER) crawler.crawl() expected = \ '''[ { "assets": [ "http://127.0.0.1:5000/source.css" ], "url": "http://127.0.0.1:5000/" } ]''' self.assertMultiLineEqual(expected, crawler.assets_json())
def test_anchor_links(self): self.start_server(TestCrawler.ANCHOR_LINKS_HTML) crawler = Crawler(TestCrawler.SERVER) crawler.crawl() expected = \ '''[ { "assets": [], "url": "http://127.0.0.1:5000/" }, { "assets": [], "url": "http://127.0.0.1:5000/fetch.html" } ]''' self.assertMultiLineEqual(expected, crawler.assets_json())
def test_file_links(self): self.start_server(TestCrawler.FILE_LINKS_HTML) crawler = Crawler(TestCrawler.SERVER) crawler.crawl() expected = \ '''[ { "assets": [ "http://127.0.0.1:5000/source.pdf", "http://127.0.0.1:5000/source.txt" ], "url": "http://127.0.0.1:5000/" }, { "assets": [], "url": "http://127.0.0.1:5000/source.pdf" }, { "assets": [], "url": "http://127.0.0.1:5000/source.txt" } ]''' self.assertMultiLineEqual(expected, crawler.assets_json())