def do_craw(url_queue:queue.Queue,html_queue:queue.Queue):
    while True:
        url = url_queue.get()
        html = blog_spider.craw(url)
        html_queue.put(html)
        print(threading.current_thread().name,f"craw {url}","url_queue.size=",url_queue.qsize())
        time.sleep(random.randint(1,2))
Пример #2
0
def single_thread():
    print("single_thread start")
    for url in blog_spider.urls:
        print(url)
        html = blog_spider.craw(url)
        # res = blog_spider.parse(html)
        # print(res)
    print("single_thread end")
def single_thread():
    print("single_thread begin:")
    for url in blog_spider.urls:
        blog_spider.craw(url)
    print("single_thread end:")
Пример #4
0
import gevent.monkey

gevent.monkey.patch_all()

import gevent
import blog_spider
import time

begin = time.time()
for url in blog_spider.urls:
    blog_spider.craw(url)
end = time.time()
print("single thread, cost = ", end - begin)

begin = time.time()
tasks = [gevent.spawn(blog_spider.craw, url) for url in blog_spider.urls]
gevent.joinall(tasks)
end = time.time()
print("gevent, cost = ", end - begin)