def dealer_process(): app_log.info('start dealer process') log_fds('start') log_mem('start') s = Storage() q = Q() while True: log_fds('start loop') log_mem('start loop') domains = yield s.fetch_domains_for_update(options.dealer_domains_per_task) if domains and len(domains) < options.dealer_domains_per_task: time.sleep(options.dealer_fetch_task_sleep_period_sec) domains = yield s.fetch_domains_for_update(options.dealer_domains_per_task) if not domains: app_log.info("not found domains") time.sleep(options.dealer_sleep_period_sec) continue app_log.info("fetch %d domains for new task" % len(domains)) res = q.add_crawler_task(domains) yield s.update_domains_after_fetch(domains) app_log.info("add task %s" % res) del domains app_log.info('end dealer process')
def dealer_process(): app_log.info('start dealer process') log_fds('start') log_mem('start') s = Storage() q = Q() while True: log_fds('start loop') log_mem('start loop') domains = yield s.fetch_domains_for_update( options.dealer_domains_per_task) if domains and len(domains) < options.dealer_domains_per_task: time.sleep(options.dealer_fetch_task_sleep_period_sec) domains = yield s.fetch_domains_for_update( options.dealer_domains_per_task) if not domains: app_log.info("not found domains") time.sleep(options.dealer_sleep_period_sec) continue app_log.info("fetch %d domains for new task" % len(domains)) res = q.add_crawler_task(domains) yield s.update_domains_after_fetch(domains) app_log.info("add task %s" % res) del domains app_log.info('end dealer process')
def parser_process(): app_log_process('start parser process') log_fds('start') log_mem('start') q = Q() s = Storage() parser = Parser(s) while True: log_fds('start loop') log_mem('start loop') task = q.get_parser_task() if task: yield parser.run(task[2]) q.complete_task(task[0]) else: app_log_process("not found task") time.sleep(options.parser_sleep_period_sec) app_log_process('end parser process')
def crawler_process(): app_log_process('start crawler process') log_fds('start') log_mem('start') q = Q() s = Storage() while True: log_fds('start loop') log_mem('start loop') task = q.get_crawler_task() if task: crawler = Crawler(task[2], q, s) yield crawler.run() q.complete_task(task[0]) del crawler else: app_log_process("not found task") time.sleep(options.crawler_sleep_period_sec) app_log_process('end crawler process') log_fds('end')