def download_batch(self): if not os.path.exists(self.base_path+"\\"+"mp3_url.txt"): self.parse_all() else: with open(self.base_path+"\\"+"mp3_url.txt",mode="r") as f: for line in f: self.mp3_url_list.append(line.strip('\n')) pool = ThreadPool(50) for i in self.mp3_url_list: pool.run(func=self.download,args=(i,self.mp3_url_list.index(i))) pool.close()
def parse_all(self): if len(self.chapter_list) != 0: pool = ThreadPool(100) print("开始解析并下载有声书:"+self.book_name) for i in range(0,len(self.chapter_list)): # print(self.chapter_list[i]) pool.run(func=self.parse,args=(self.chapter_list[i],i,),callback=self.callback) pool.close() else: print("下载器未数初始化")
for i in f: print(i.strip("\n")) print("~" * 50 + "该爬虫仅用于交流学习,如果需要大量爬取请自行加入ip池" + "~" * 50) print("~" * 31 + "如果想要爬取多个人的信息,只需在目录中的id.txt填入id列表(一行一个)然后选择第三个模式即可开始爬取" + "~" * 31) a = input("请输入1或2或3选择要进行步骤:\n1.爬取输入用户信息\n2.爬取粉丝id\n3.爬取粉丝(多人)信息\n") if a == "1": user_id = input("请输入用户的id:") spider = Spider(int(user_id), 0) spider.get_info(int(user_id)) elif a == "2": user_id = input("请输入用户的id:") fans_num = input("请输入想要获取的粉丝信息的数量:") spider = Spider(int(user_id), int(fans_num)) spider.get_fans_id() elif a == "3": spider = Spider(0, 0) id_list = [] try: with open(dir + "\\id.txt", encoding="utf-8", mode="r") as f: for line in f: id_list.append(line.strip('\n')) f.close() pool = ThreadPool(20) for i in range(0, len(id_list)): pool.run(func=spider.get_info, args=(id_list[i], )) pool.close() except: print("未找到id.txt文件,请先提供id列表,或选择2模式获取id列表") else: print("请输入1或2或3")