示例#1
0
def get_baiduzd_faq(question):
	answers = []															# 返回值
	question_list_html = None							# 返回百度知道搜索结果的第一页 html 文本
	while question_list_html is None:
		question_list_html = get_baiduzd_page(question)							# 返回百度知道搜索结果的第一页 html 文本

	if(question_list_html is None):
		print("爬取百度知道答案列表页面失败!")
		return None
	questions = get_sim_questions(question_list_html)						# questions 为 问题句子-链接 字典
	threadPool = []															# 线程队列
	for question in questions:												# 遍历每个问题链接,获取该问题的前 5 个答案
		thread = MyThread(get_a_question_ans,args=(question['link']))		# 多线程加速
		thread.start()
		threadPool.append(thread)

	for thread in threadPool:
		thread.join()

	for thread in threadPool:
		res = thread.get_result()
		if res is not None:
			answers += res

	return answers
示例#2
0
    def get_apps_info(self, url, classification, type):
        main_html = self.get_html(url)
        if main_html == None:
            print(url + "爬取失败!")
            return
        # 获取当前分类下的总页数
        pageList_div = main_html.find("div", {"class": "pagelist", "id": "pagelist"})
        pageList = pageList_div.find_all("a")
        #当前软件分类下的总页数
        all_pages = int(pageList[len(pageList) - 2].get_text())

        # 全局变量
        global SHARE_Q
        threads = []
        # 向队列中放入任务, 真正使用时, 应该设置为可持续的放入任务
        for i in range(0, all_pages):
            current_url = url + "/" + str(i + 1)
            SHARE_Q.put(current_url)
        # 开启_WORKER_THREAD_NUM个线程
        for i in range(_WORKER_THREAD_NUM):
            thread = MyThread(self.worker(classification, type))
            # 线程开始处理任务
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()
        # 等待所有任务完成
        SHARE_Q.join()
示例#3
0
 def test_ip(self, ips):
     #检查代理ip可用性
     print("检查代理ip可用性...共有" + str(len(ips)) + "个ip待检查")
     global SHARE_Q
     threads = []
     # 向队列中放入任务, 真正使用时, 应该设置为可持续的放入任务
     for task in range(0, len(ips)):
         SHARE_Q.put(ips[task])
     # 开启_WORKER_THREAD_NUM个线程
     for i in range(_WORKER_THREAD_NUM):
         thread = MyThread(self.worker)
         thread.start()  # 线程开始处理任务
         threads.append(thread)
     for thread in threads:
         thread.join()
     # 等待所有任务完成
     SHARE_Q.join()
     print("可用代理ip:" + str(self.valid_ips))
示例#4
0
def main():
    set_hook()
    thread = MyThread()
    thread.start()
    time.sleep(5)
    thread.join()
# ch3/example1.py
import sys

sys.path.append(
    "'C:\\Users\\tnguy\\PycharmProjects\\MasteringConcurrency\\Chapter03\\my_thread.py"
)

from my_thread import MyThread

thread1 = MyThread('A', 0.5)
thread2 = MyThread('B', 0.5)

thread1.start()
thread2.start()

thread1.join()
thread2.join()

print('Finished.')