def stest_get_domain_seg(self): sp = spider.spider([]) self.assertEqual(sp.get_domain_seg("http://www.sina.com/dos.html"), "http://www.sina.com") self.assertEqual(sp.get_domain_seg("http://www.sina.com/"), "http://www.sina.com") self.assertEqual(sp.get_domain_seg("http://www.sina.com"), "http://www.sina.com")
def stest_get_dir(self): sp = spider.spider([]) self.assertEqual(sp.get_dir("http://www.sohu.com/index.html"), "http://www.sohu.com/") self.assertEqual(sp.get_dir("http://www.sohu.com/i/ndex.html"), "http://www.sohu.com/i/") self.assertEqual(sp.get_dir("http://www.sohu.com"), "http://www.sohu.com/") self.assertEqual(sp.get_dir("http://www.sohu.com/"), "http://www.sohu.com/")
def stest_get_base_dir(self): sp=spider.spider([]) file="search.html" s=open(file,"r").read() doc=(document.HTMLDocument(s)) self.assertEqual(sp.get_base_dir(doc,"http://www.spider.com/dsfdsf/html.html"),"http://www.spider.com/dsfdsf/") file="search2.html" s=open(file,"r").read() doc=(document.HTMLDocument(s)) self.assertEqual(sp.get_base_dir(doc,"http://www.spider.com/dsfdsf/html.html"),"http://www.sina.com/dos/")
def stest_get_full_url(self): sp = spider.spider([]) self.assertEqual( sp.get_full_url("http://www.sohu.com/dos.html", "http://localhost/sina.com/"), "http://www.sohu.com/dos.html") self.assertEqual( sp.get_full_url("/tes.html", "http://www.sina.com/tes/"), "http://www.sina.com/tes.html") self.assertEqual( sp.get_full_url("tess.html", "http://www.sina.com/dos/"), "http://www.sina.com/dos/tess.html")
def stest_get_base_dir(self): sp = spider.spider([]) file = "search.html" s = open(file, "r").read() doc = (document.HTMLDocument(s)) self.assertEqual( sp.get_base_dir(doc, "http://www.spider.com/dsfdsf/html.html"), "http://www.spider.com/dsfdsf/") file = "search2.html" s = open(file, "r").read() doc = (document.HTMLDocument(s)) self.assertEqual( sp.get_base_dir(doc, "http://www.spider.com/dsfdsf/html.html"), "http://www.sina.com/dos/")
def stest_get_full_url(self): sp=spider.spider([]) self.assertEqual(sp.get_full_url("http://www.sohu.com/dos.html","http://localhost/sina.com/"),"http://www.sohu.com/dos.html") self.assertEqual(sp.get_full_url("/tes.html","http://www.sina.com/tes/"),"http://www.sina.com/tes.html") self.assertEqual(sp.get_full_url("tess.html","http://www.sina.com/dos/"),"http://www.sina.com/dos/tess.html")
def stest_get_domain_seg(self): sp=spider.spider([]) self.assertEqual(sp.get_domain_seg("http://www.sina.com/dos.html"),"http://www.sina.com") self.assertEqual(sp.get_domain_seg("http://www.sina.com/"),"http://www.sina.com") self.assertEqual(sp.get_domain_seg("http://www.sina.com"),"http://www.sina.com")
def stest_get_dir(self): sp=spider.spider([]) self.assertEqual(sp.get_dir("http://www.sohu.com/index.html"),"http://www.sohu.com/") self.assertEqual(sp.get_dir("http://www.sohu.com/i/ndex.html"),"http://www.sohu.com/i/") self.assertEqual(sp.get_dir("http://www.sohu.com"),"http://www.sohu.com/") self.assertEqual(sp.get_dir("http://www.sohu.com/"),"http://www.sohu.com/")
def stest_fix_url(self): sp=spider.spider([]) self.assertEqual(sp.fix_url("http://www.sohu.com/../i/fin/./../index*.html"),"http://www.sohu.com/i/index2A%.html")
def stest_fix_url(self): sp = spider.spider([]) self.assertEqual( sp.fix_url("http://www.sohu.com/../i/fin/./../index*.html"), "http://www.sohu.com/i/index2A%.html")