options = parseoption() if not options: exit() ##规整url## options.url = sanitizeUrl(options.url) ##根据命令行参数设置日志## logConfig(options) logger = logging.getLogger() ##启动爬虫## crawler = Crawler(options) crawler.start() print "INFO:thead number of threadpool:%s,fetched url:%s,unfeched url:%s,depth:%s" % crawler.getInfo() ##10s显示一次状态## while crawler.isAlive(): for x in xrange(10): if crawler.isAlive(): time.sleep(1) continue else: break if crawler.isAlive(): print "INFO:thead number of threadpool:%s,fetched url:%s,unfeched url:%s,depth:%s" % crawler.getInfo() print "INFO:thead number of threadpool:%s,fetched url:%s,unfeched url:%s,depth:%s" % crawler.getInfo() print "over"