Exemplo n.º 1
0
 def run(self):
     while not global_EXIT:
         url = ""
         try:
             website_id, url = Cache.getQueue(cache.freshContentUrl_queue, False)
             res = filterContentInfoFunc(website_id, url)
             if res == SpiderResType.success or res == SpiderResType.alreadyExist:
                 Cache.appendList(cache.oldContent_list, url)
             else:
                 Cache.setDict(cache.unrecognized_contentUrl_dict, url, website_id)
         except Exception as e:
             if type(e) is not queue.Empty:
                 log.logMsg(LogType.error, "[FilterContentInfoThread] %s %s" % (url, traceback.format_exc()))
Exemplo n.º 2
0
 def run(self):
     while not global_EXIT:
         website_url = ""
         try:
             website_id, website_url, xpath = Cache.getQueue(cache.websiteUrl_queue, False)
             if not filterContentUrlFunc(website_id, website_url, xpath):
                 Cache.setDict(cache.unrecognized_websiteUrl_dict, website_id, (website_url, xpath))
         except Exception as e:
             if type(e) is not queue.Empty:
                 log.logMsg(LogType.error, "[FilterContentUrlThread.freshHandler] %s %s"%(website_url, traceback.format_exc()))
             else:
                 for i in range(10):
                     if global_EXIT: break
                     time.sleep(1)
Exemplo n.º 3
0
 def run(self):
     while not Cache.getDict(cache.globalArgs_dict, "LogThread_EXIT"):
         try:
             info = Cache.getQueue(cache.log_queue, False)
             if os.path.exists(self.getFilename()):
                 log_size = os.path.getsize(
                     self.getFilename()) / 1024 / 1024  # 日志大小超过1M时另建新的日志文件
                 if log_size > 1:
                     self.index += 1
             with open(self.getFilename(), 'a') as f:
                 info += '<%s>\n' % (
                     datetime.datetime.now().strftime("%H:%M:%S"))
                 f.write(info)
         except Exception as e:
             if type(e) is not queue.Empty:
                 print("Log Error: %s" % e)