示例#1
0
 def stest_get_domain_seg(self):
     sp = spider.spider([])
     self.assertEqual(sp.get_domain_seg("http://www.sina.com/dos.html"),
                      "http://www.sina.com")
     self.assertEqual(sp.get_domain_seg("http://www.sina.com/"),
                      "http://www.sina.com")
     self.assertEqual(sp.get_domain_seg("http://www.sina.com"),
                      "http://www.sina.com")
示例#2
0
 def stest_get_dir(self):
     sp = spider.spider([])
     self.assertEqual(sp.get_dir("http://www.sohu.com/index.html"),
                      "http://www.sohu.com/")
     self.assertEqual(sp.get_dir("http://www.sohu.com/i/ndex.html"),
                      "http://www.sohu.com/i/")
     self.assertEqual(sp.get_dir("http://www.sohu.com"),
                      "http://www.sohu.com/")
     self.assertEqual(sp.get_dir("http://www.sohu.com/"),
                      "http://www.sohu.com/")
示例#3
0
文件: spider.py 项目: lennon310/hyer
	def stest_get_base_dir(self):
		sp=spider.spider([])
		file="search.html"
		s=open(file,"r").read()
		doc=(document.HTMLDocument(s))
		self.assertEqual(sp.get_base_dir(doc,"http://www.spider.com/dsfdsf/html.html"),"http://www.spider.com/dsfdsf/")

		file="search2.html"
		s=open(file,"r").read()
		doc=(document.HTMLDocument(s))
		self.assertEqual(sp.get_base_dir(doc,"http://www.spider.com/dsfdsf/html.html"),"http://www.sina.com/dos/")
示例#4
0
 def stest_get_full_url(self):
     sp = spider.spider([])
     self.assertEqual(
         sp.get_full_url("http://www.sohu.com/dos.html",
                         "http://localhost/sina.com/"),
         "http://www.sohu.com/dos.html")
     self.assertEqual(
         sp.get_full_url("/tes.html", "http://www.sina.com/tes/"),
         "http://www.sina.com/tes.html")
     self.assertEqual(
         sp.get_full_url("tess.html", "http://www.sina.com/dos/"),
         "http://www.sina.com/dos/tess.html")
示例#5
0
    def stest_get_base_dir(self):
        sp = spider.spider([])
        file = "search.html"
        s = open(file, "r").read()
        doc = (document.HTMLDocument(s))
        self.assertEqual(
            sp.get_base_dir(doc, "http://www.spider.com/dsfdsf/html.html"),
            "http://www.spider.com/dsfdsf/")

        file = "search2.html"
        s = open(file, "r").read()
        doc = (document.HTMLDocument(s))
        self.assertEqual(
            sp.get_base_dir(doc, "http://www.spider.com/dsfdsf/html.html"),
            "http://www.sina.com/dos/")
示例#6
0
文件: spider.py 项目: lennon310/hyer
	def stest_get_full_url(self):
		sp=spider.spider([])
		self.assertEqual(sp.get_full_url("http://www.sohu.com/dos.html","http://localhost/sina.com/"),"http://www.sohu.com/dos.html")
		self.assertEqual(sp.get_full_url("/tes.html","http://www.sina.com/tes/"),"http://www.sina.com/tes.html")
		self.assertEqual(sp.get_full_url("tess.html","http://www.sina.com/dos/"),"http://www.sina.com/dos/tess.html")
示例#7
0
文件: spider.py 项目: lennon310/hyer
	def stest_get_domain_seg(self):
		sp=spider.spider([])
		self.assertEqual(sp.get_domain_seg("http://www.sina.com/dos.html"),"http://www.sina.com")	
		self.assertEqual(sp.get_domain_seg("http://www.sina.com/"),"http://www.sina.com")	
		self.assertEqual(sp.get_domain_seg("http://www.sina.com"),"http://www.sina.com")	
示例#8
0
文件: spider.py 项目: lennon310/hyer
	def stest_get_dir(self):
		sp=spider.spider([])
		self.assertEqual(sp.get_dir("http://www.sohu.com/index.html"),"http://www.sohu.com/")
		self.assertEqual(sp.get_dir("http://www.sohu.com/i/ndex.html"),"http://www.sohu.com/i/")
		self.assertEqual(sp.get_dir("http://www.sohu.com"),"http://www.sohu.com/")
		self.assertEqual(sp.get_dir("http://www.sohu.com/"),"http://www.sohu.com/")
示例#9
0
文件: spider.py 项目: lennon310/hyer
	def stest_fix_url(self):
		sp=spider.spider([])
		self.assertEqual(sp.fix_url("http://www.sohu.com/../i/fin/./../index*.html"),"http://www.sohu.com/i/index2A%.html")
示例#10
0
 def stest_fix_url(self):
     sp = spider.spider([])
     self.assertEqual(
         sp.fix_url("http://www.sohu.com/../i/fin/./../index*.html"),
         "http://www.sohu.com/i/index2A%.html")