Пример #1
0
 def fire(self,entry_map,interval):
     self.__interval = interval
     self.__state = STATE_PROCESSING
     import id_mgr
     act_id = id_mgr.get_new_id()
     id_mgr.set_id("SpiderID",act_id)
     logginghelper.info(logginghelper.StatisticEventLog(str(act_id),"Spider",str(time.time()),"Start"))
     import req_gen
     import net_task
     for url,processor_class in entry_map:
         req = req_gen.make_request(url)
         job = net_task.make_job(req,None,None)
         cb_obj = processor_class(job)
         job.execute(cb_obj.handle_response)
     return True
    global _engine
    if _engine:
        return False
    _engine = HTTPClientEngine(site_conn_limit,default_limit)
    return True

def get_engine():
    return _engine

########################################################################################
#below coding is just for testing
def test_on_response(response):
    engine = get_engine()
    print "current request count in pending:"," www.baidu.com:",engine.get_pending_count("www.baidu.com",),"www.qq.com:",engine.get_pending_count("www.qq.com",)
    print "Connection:",response.headers["Connection"]

if __name__ == "__main__":
    initialize()
    this_engine = get_engine()
    import req_gen
    for i in range(0,30):
        req = req_gen.make_request("http://www.baidu.com/",no_keep_alive=False)
        this_engine.fetch(req,test_on_response)
        req = req_gen.make_request("http://www.qq.com/",no_keep_alive=False)
        this_engine.fetch(req,test_on_response)
        print "www.baidu.com:",this_engine.get_pending_count("www.baidu.com",),"www.qq.com:",this_engine.get_pending_count("www.qq.com",)
    import tornado.ioloop
    tornado.ioloop.IOLoop.instance().start()