Exemplo n.º 1
0
 def parse_index(self,page,url):
     link_wrap_list = txt_wrap_by_all('已翻译','<span',page)
     link_list = []
     for link_wrap in link_wrap_list:
         url = txt_wrap_by('href="','"',link_wrap)
         if url and not url_is_fetched(url):
             yield self.parse_page,'http://dongxi.net/%s'%url
Exemplo n.º 2
0
 def parse_index(self,page, url):
     print "!"
     link_wrapper_list = txt_wrap_by_all('<h5 clas', '</h5', page)
     link_list = []
     for link_wrapper in link_wrapper_list:
         url = txt_wrap_by('href="', '"', link_wrapper)
         filename = self.name_builder(url)
         if not url_is_fetched(url):
             yield self.save_page, url
         else:
             self.parse_page(filename)