def __init__(self, reactor, queue_service, conf): self.logger = logging.getLogger("") self.reactor = reactor self.queue_service = queue_service self.hub_extractor = HubExtractor(conf) # FIXME delete article server self.article_server = ArticleServer(reactor, conf)
class HubServer(object): def __init__(self, reactor, queue_service, conf): self.logger = logging.getLogger("") self.reactor = reactor self.queue_service = queue_service self.hub_extractor = HubExtractor(conf) # FIXME delete article server self.article_server = ArticleServer(reactor, conf) def process_request(self, response, url): return self.logger.info("http response, url:%s, code:%s, phrase:%s, headers:%s" % (url, response.code, response.phrase, pformat(list(response.headers.getAllRawHeaders())))) def process_body(self, body, url): self.logger.info("page body, url:%s, body:%s" % (url, body[:100])) not_exist = self.hub_extractor.extract(body, url) if not not_exist: print not_exist for url in not_exist: self.article_server.process_task(url) #print body[:100] def process_error(self, failure, url): print failure.getErrorMessage() self.logger.error("download error, url:%s, msg:%s" % (url, failure.getErrorMessage())) def process_task(self, url): url = url.encode('utf-8') requestProcess = (self.process_request, (url,), {}) bodyProcess = (self.process_body, (url,), {}) errorProcess = (self.process_error, (url,), {}) #print "process_task:", url self.reactor.download_and_process(url, None, requestProcess, bodyProcess, errorProcess, redirect=True) def start(self): while True: try: task, msg = self.queue_service.get(10) if task: url = task.url self.process_task(url) else: print 'queue empty' except KeyboardInterrupt: sys.exit(0) except Exception as e: print e pass