示例#1
0
文件: spider.py 项目: yongbo/spider
    options = parseoption()
    if not options:
        exit()
         
    ##规整url##
    options.url = sanitizeUrl(options.url)
        
    ##根据命令行参数设置日志##
    logConfig(options)
    logger =  logging.getLogger()
        
    ##启动爬虫##
    crawler = Crawler(options)
    crawler.start()
        
    print "INFO:thead number of threadpool:%s,fetched url:%s,unfeched url:%s,depth:%s" % crawler.getInfo()
    ##10s显示一次状态##
    while crawler.isAlive():
        for x in xrange(10):
            if crawler.isAlive():
                time.sleep(1)
                continue
            else:
                break
        if crawler.isAlive():
            print "INFO:thead number of threadpool:%s,fetched url:%s,unfeched url:%s,depth:%s" % crawler.getInfo()
        
    print "INFO:thead number of threadpool:%s,fetched url:%s,unfeched url:%s,depth:%s" % crawler.getInfo()
    print "over"