def parse_index(self,page,url): link_wrap_list = txt_wrap_by_all('已翻译','<span',page) link_list = [] for link_wrap in link_wrap_list: url = txt_wrap_by('href="','"',link_wrap) if url and not url_is_fetched(url): yield self.parse_page,'http://dongxi.net/%s'%url
def parse_index(self,page, url): print "!" link_wrapper_list = txt_wrap_by_all('<h5 clas', '</h5', page) link_list = [] for link_wrapper in link_wrapper_list: url = txt_wrap_by('href="', '"', link_wrapper) filename = self.name_builder(url) if not url_is_fetched(url): yield self.save_page, url else: self.parse_page(filename)