def do_craw(url_queue:queue.Queue,html_queue:queue.Queue): while True: url = url_queue.get() html = blog_spider.craw(url) html_queue.put(html) print(threading.current_thread().name,f"craw {url}","url_queue.size=",url_queue.qsize()) time.sleep(random.randint(1,2))
def single_thread(): print("single_thread start") for url in blog_spider.urls: print(url) html = blog_spider.craw(url) # res = blog_spider.parse(html) # print(res) print("single_thread end")
def single_thread(): print("single_thread begin:") for url in blog_spider.urls: blog_spider.craw(url) print("single_thread end:")
import gevent.monkey gevent.monkey.patch_all() import gevent import blog_spider import time begin = time.time() for url in blog_spider.urls: blog_spider.craw(url) end = time.time() print("single thread, cost = ", end - begin) begin = time.time() tasks = [gevent.spawn(blog_spider.craw, url) for url in blog_spider.urls] gevent.joinall(tasks) end = time.time() print("gevent, cost = ", end - begin)