示例#1
0
 def _failed(self, filename):
     urls = FileUtil.readfilelist(filename)
     for i, failed_url in enumerate(urls):
         html = self.downloader.download(failed_url)
         datas = self.parser.parse(failed_url, html)
         self.logger.info("the spider system has fetch %s failed links" %
                          str(i + 1))
         self.output.add_data(datas)
示例#2
0
 def _failed(self, filename):
     """
     fetch failed link
     :param filename:
     :return:
     """
     content = FileUtil.readfilelist(filename)
     for i, item in enumerate(content):
         url, title = item.split('\t')
         try:
             self._download(url, title)
             self.logger.info(
                 "the spider system has crawl %s failed links" % str(i + 1))
         except Exception as e:
             self.logger.info('fetch the failed contents still failed: %s' %
                              str(e))