def main(self): page_count = 1 image_count = 0 main_thread_count = threading.activeCount() # 多线程下载类型 # 1 同时开始N个下载线程 # 2 对一页中的所有图片开启多线程下载,下完一页中的所有图片后开始下一页 thread_type = 2 while True: # 获取一页页面 page_data = get_one_page_data(page_count) if page_data is None: log.error("第%s页获取失败" % page_count) break # 获取页面中的所有图片信息列表 image_info_list = re.findall('<img src="" data-original="([^"]*)" class="lazy img" title="([^"]*)">', page_data) # 获取页面中的影片数量 page_data_count = page_data.count('<div class="item pull-left">') # 已经下载完毕了 if page_data_count == 0: break log.step("第%s页,影片数量%s,获取到的封面图片数量%s" % (page_count, len(image_info_list), page_data_count)) for small_image_url, title in image_info_list: # 达到线程上限,等待 while thread_type == 1 and threading.activeCount() >= self.thread_count + main_thread_count: time.sleep(5) title = robot.filter_text(str(title)).upper() image_url = get_large_image_url(small_image_url) if image_url is None: log.trace("%s的封面图片大图地址获取失败" % title) continue log.step("开始下载%s的封面图片 %s" % (title, image_url)) file_type = image_url.split(".")[-1] file_path = os.path.join(self.image_download_path, "%s.%s" % (title, file_type)) file_temp_path = os.path.join(self.image_download_path, "%s_temp.%s" % (title, file_type)) # 开始下载 thread = Download(self.thread_lock, title, file_path, file_temp_path, image_url) thread.start() time.sleep(0.1) # 还有未完成线程 while thread_type == 2 and threading.activeCount() > main_thread_count: time.sleep(5) page_count += 1 log.step("全部下载完毕,耗时%s秒,共计图片%s张" % (self.get_run_time(), TOTAL_IMAGE_COUNT))
def main(self): # 解析存档文件,获取上一次的album id album_id = 1 if os.path.exists(self.save_data_path): save_file = open(self.save_data_path, "r") save_info = save_file.read() save_file.close() album_id = int(save_info.strip()) total_image_count = 0 total_video_count = 0 error_count = 0 is_over = False while not is_over: album_url = "http://meituzz.com/album/browse?albumID=%s" % album_id try: album_page_return_code, album_page = tool.http_request(album_url)[:2] except SystemExit: log.step("提前退出") break if album_page_return_code == -500: log.error("第%s页相册内部错误" % album_id) album_id += 1 continue elif album_page_return_code != 1: log.error("第%s页图片获取失败" % album_id) break if album_page.find("<title>相册已被删除</title>") >= 0: error_count += 1 if error_count >= ERROR_PAGE_COUNT_CHECK: log.error("连续%s页相册没有图片,退出程序" % ERROR_PAGE_COUNT_CHECK) album_id -= error_count - 1 break else: log.error("第%s页相册已被删除" % album_id) album_id += 1 continue # 错误数量重置 error_count = 0 # 图片下载 if self.is_download_image and album_page.find('<input type="hidden" id="imageList"') >= 0: total_photo_count = tool.find_sub_string(album_page, '<input type="hidden" id="totalPageNum" value=', ' />') if not total_photo_count: log.error("第%s页图片数量解析失败" % album_id) break total_photo_count = int(total_photo_count) # 获取页面全部图片地址列表 image_url_list = get_image_url_list(album_page) if image_url_list is None: log.error("第%s页图片地址列表解析失败" % album_id) break if len(image_url_list) == 0: log.error("第%s页没有获取到图片" % album_id) break is_fee = False if len(image_url_list) != total_photo_count: album_reward_find = re.findall('<input type="hidden" id="rewardAmount" value="(\d*)">', album_page) if len(album_reward_find) == 1: album_reward = int(album_reward_find[0]) if album_reward > 0 and total_photo_count - len(image_url_list) <= 1: is_fee = True if not is_fee: log.error("第%s页解析获取的图片数量不符" % album_id) # break image_path = os.path.join(self.image_download_path, "%04d" % album_id) if not tool.make_dir(image_path, 0): log.error("创建图片下载目录 %s 失败" % image_path) break image_count = 1 for image_url in image_url_list: # 去除模糊效果 image_url = str(image_url).split("@")[0] log.step("开始下载第%s页第%s张图片 %s" % (album_id, image_count, image_url)) image_file_path = os.path.join(image_path, "%04d.jpg" % image_count) try: if tool.save_net_file(image_url, image_file_path, True): log.step("第%s页第%s张图片下载成功" % (album_id, image_count)) image_count += 1 else: log.error("第%s页第%s张图片 %s 下载失败" % (album_id, image_count, image_url)) except SystemExit: log.step("提前退出") tool.remove_dir(image_path) is_over = True break total_image_count += image_count - 1 # 视频下载 if self.is_download_image and album_page.find('<input type="hidden" id="VideoUrl"') >= 0: # 获取视频下载地址 video_url = get_video_url(album_page) log.step("开始下载第%s页视频 %s" % (album_id, video_url)) video_title = robot.filter_text(tool.find_sub_string(album_page, "<title>", "</title>")) file_type = video_url.split(".")[-1] video_file_path = os.path.join(self.video_download_path, "%s %s.%s" % (album_id, video_title, file_type)) try: if tool.save_net_file(video_url, video_file_path, True): log.step("第%s页视频下载成功" % album_id) total_video_count += 1 else: log.error("第%s页视频 %s 下载失败" % (album_id, video_url)) except SystemExit: log.step("提前退出") is_over = True if not is_over: album_id += 1 # 重新保存存档文件 save_data_dir = os.path.dirname(self.save_data_path) if not os.path.exists(save_data_dir): tool.make_dir(save_data_dir, 0) save_file = open(self.save_data_path, "w") save_file.write(str(album_id)) save_file.close() log.step("全部下载完毕,耗时%s秒,共计图片%s张,视频%s个" % (self.get_run_time(), total_image_count, total_video_count))
def run(self): global TOTAL_IMAGE_COUNT account_name = self.account_info[0] try: log.step(account_name + " 开始") if account_name.isdigit(): site_id = account_name else: site_id = get_site_id(account_name) if site_id is None: log.error(account_name + " 主页无法访问") tool.process_exit() if not site_id: log.error(account_name + " site id解析失败") tool.process_exit() image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name) this_account_total_image_count = 0 post_count = 0 first_post_id = "0" post_time = "2016-11-16 14:12:00" is_over = False while not is_over: # 获取一页的相册信息列表 post_info_list = get_one_page_post_info_list(site_id, post_time) if post_info_list is None: log.error(account_name + " 相册信息列表无法访问") tool.process_exit() # 如果为空,表示已经取完了 if len(post_info_list) == 0: break for post_info in post_info_list: if not robot.check_sub_key(("title", "post_id", "published_at", "images"), post_info): log.error(account_name + " 相册信息解析失败:%s" % post_info) continue post_id = str(post_info["post_id"]) # 检查信息页id是否小于上次的记录 if int(post_id) <= int(self.account_info[1]): is_over = True break # 将第一个信息页的id做为新的存档记录 if first_post_id == "0": first_post_id = post_id # 过滤标题中不支持的字符 title = robot.filter_text(post_info["title"]) if title: post_path = os.path.join(image_path, "%s %s" % (post_id, title)) else: post_path = os.path.join(image_path, post_id) if not tool.make_dir(post_path, 0): # 目录出错,把title去掉后再试一次,如果还不行退出 log.error(account_name + " 创建相册目录 %s 失败,尝试不使用title" % post_path) post_path = os.path.join(image_path, post_id) if not tool.make_dir(post_path, 0): log.error(account_name + " 创建相册目录 %s 失败" % post_path) tool.process_exit() image_count = 0 for image_info in post_info["images"]: image_count += 1 if not robot.check_sub_key(("img_id",), image_info): log.error(account_name + " 相册%s 第%s张图片解析失败" % (post_id, image_count)) continue image_url = generate_large_image_url(site_id, image_info["img_id"]) log.step(account_name + " 相册%s 开始下载第%s张图片 %s" % (post_id, image_count, image_url)) file_path = os.path.join(post_path, "%s.jpg" % image_count) if tool.save_net_file(image_url, file_path): log.step(account_name + " 相册%s 第%s张图片下载成功" % (post_id, image_count)) else: log.error(account_name + " 相册%s 第%s张图片 %s 下载失败" % (post_info["post_id"], image_count, image_url)) this_account_total_image_count += image_count if not is_over: # 达到配置文件中的下载页数,结束 if 0 < GET_PAGE_COUNT < post_count: is_over = True else: # 相册发布时间 post_time = post_info["published_at"] post_count += 1 log.step(account_name + " 下载完毕,总共获得%s张图片" % this_account_total_image_count) # 新的存档记录 if first_post_id != "0": self.account_info[1] = first_post_id # 保存最后的信息 tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH) self.thread_lock.acquire() TOTAL_IMAGE_COUNT += this_account_total_image_count ACCOUNTS.remove(account_name) self.thread_lock.release() log.step(account_name + " 完成") except SystemExit, se: if se.code == 0: log.step(account_name + " 提前退出") else: log.error(account_name + " 异常退出")
def run(self): global TOTAL_IMAGE_COUNT coser_id = self.account_info[0] if len(self.account_info) >= 3: cn = self.account_info[2] else: cn = self.account_info[0] try: log.step(cn + " 开始") image_path = os.path.join(IMAGE_DOWNLOAD_PATH, cn) # 图片下载 this_cn_total_image_count = 0 page_count = 1 total_rp_count = 1 first_rp_id = "" unique_list = [] is_over = False need_make_download_dir = True # 是否需要创建cn目录 while not is_over: # 获取一页的作品信息 post_page = get_one_page_post(coser_id, page_count) if post_page is None: log.error(cn + " 无法访问第%s页作品" % page_count) tool.process_exit() # 解析作品信息,获取所有的正片信息 cp_id, rp_list = get_rp_list(post_page) if cp_id is None: log.error(cn + " 第%s页作品解析异常" % page_count) tool.process_exit() for rp_id, title in rp_list.iteritems(): # 检查是否已下载到前一次的图片 if int(rp_id) <= int(self.account_info[1]): is_over = True break # 新增正片导致的重复判断 if rp_id in unique_list: continue else: unique_list.append(rp_id) # 将第一个作品的id做为新的存档记录 if first_rp_id == "": first_rp_id = rp_id log.trace("rp: " + rp_id) if need_make_download_dir: if not tool.make_dir(image_path, 0): log.error(cn + " 创建CN目录 %s 失败" % image_path) tool.process_exit() need_make_download_dir = False # 过滤标题中不支持的字符 title = robot.filter_text(title) if title: rp_path = os.path.join(image_path, "%s %s" % (rp_id, title)) else: rp_path = os.path.join(image_path, rp_id) if not tool.make_dir(rp_path, 0): # 目录出错,把title去掉后再试一次,如果还不行退出 log.error(cn + " 创建作品目录 %s 失败,尝试不使用title" % rp_path) rp_path = os.path.join(image_path, rp_id) if not tool.make_dir(rp_path, 0): log.error(cn + " 创建作品目录 %s 失败" % rp_path) tool.process_exit() # 获取正片页面内的所有图片地址列表 image_url_list = get_image_url_list(cp_id, rp_id) if image_url_list is None: log.error(cn + " 无法访问正片:%s,cp_id:%s" % (rp_id, cp_id)) continue if len(image_url_list) == 0 and IS_AUTO_FOLLOW: log.step(cn + " 检测到可能有私密作品且账号不是ta的粉丝,自动关注") if follow(coser_id): # 重新获取下正片页面内的所有图片地址列表 image_url_list = get_image_url_list(cp_id, rp_id) if len(image_url_list) == 0: log.error(cn + " 正片:%s没有任何图片,可能是你使用的账号没有关注ta,所以无法访问只对粉丝开放的私密作品,cp_id:%s" % (rp_id, cp_id)) continue image_count = 1 for image_url in list(image_url_list): # 禁用指定分辨率 image_url = "/".join(image_url.split("/")[0:-1]) log.step(cn + " %s 开始下载第%s张图片 %s" % (rp_id, image_count, image_url)) if image_url.rfind("/") < image_url.rfind("."): file_type = image_url.split(".")[-1] else: file_type = "jpg" file_path = os.path.join(rp_path, "%03d.%s" % (image_count, file_type)) if tool.save_net_file(image_url, file_path): image_count += 1 log.step(cn + " %s 第%s张图片下载成功" % (rp_id, image_count)) else: log.error(cn + " %s 第%s张图片 %s 下载失败" % (rp_id, image_count, image_url)) this_cn_total_image_count += image_count - 1 if 0 < GET_PAGE_COUNT < total_rp_count: is_over = True break else: total_rp_count += 1 if not is_over: if page_count >= get_max_page_count(coser_id, post_page): is_over = True else: page_count += 1 log.step(cn + " 下载完毕,总共获得%s张图片" % this_cn_total_image_count) # 新的存档记录 if first_rp_id != "": self.account_info[1] = first_rp_id # 保存最后的信息 tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH) self.thread_lock.acquire() TOTAL_IMAGE_COUNT += this_cn_total_image_count ACCOUNTS.remove(coser_id) self.thread_lock.release() log.step(cn + " 完成") except SystemExit: log.error(cn + " 异常退出") except Exception, e: log.error(cn + " 未知异常") log.error(str(e) + "\n" + str(traceback.format_exc()))
def run(self): global TOTAL_VIDEO_COUNT account_id = self.account_info[0] if len(self.account_info) >= 3 and self.account_info[2]: account_name = self.account_info[2] else: account_name = self.account_info[0] try: log.step(account_name + " 开始") # 如果需要重新排序则使用临时文件夹,否则直接下载到目标目录 if IS_SORT: video_path = os.path.join(VIDEO_TEMP_PATH, account_name) else: video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name) # 获取视频信息列表 video_info_list = get_video_info_list(account_id) if video_info_list is None: log.error(account_name + " 视频列表获取失败") tool.process_exit() video_count = 1 first_video_id = "0" need_make_video_dir = True for video_info in video_info_list: if not robot.check_sub_key(("item_data",), video_info) or \ not robot.check_sub_key(("watch_id", "title"), video_info["item_data"]): log.error(account_name + " 视频信息%s解析失败" % video_info) tool.process_exit() # sm30043563 video_id = str(video_info["item_data"]["watch_id"]) # 过滤标题中不支持的字符 video_title = robot.filter_text(video_info["item_data"]["title"]) # 第一个视频,创建目录 if need_make_video_dir: if not tool.make_dir(video_path, 0): log.error(account_name + " 创建图片下载目录 %s 失败" % video_path) tool.process_exit() need_make_video_dir = False # 获取视频下载地址 video_url = get_video_url(video_id) log.step(account_name + " 开始下载第%s个视频 %s %s" % (video_count, video_id, video_url)) print video_title print "%s %s" % (video_id, video_title) file_path = os.path.join(video_path, "%s %s.mp4" % (video_id, video_title)) if tool.save_net_file(video_url, file_path): log.step(account_name + " 第%s个视频下载成功" % video_count) video_count += 1 else: log.error(account_name + " 第%s个视频 %s %s 下载失败" % (video_count, video_id, video_url)) log.step(account_name + " 下载完毕,总共获得%s个视频" % (video_count - 1)) # 排序 if IS_SORT: if first_video_id != "0": destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name) if robot.sort_file(video_path, destination_path, int(self.account_info[3]), 4): log.step(account_name + " 视频从下载目录移动到保存目录成功") else: log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path) tool.process_exit() # 新的存档记录 if first_video_id != "0": self.account_info[1] = first_video_id # 保存最后的信息 tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH) self.thread_lock.acquire() TOTAL_VIDEO_COUNT += video_count - 1 ACCOUNTS.remove(account_id) self.thread_lock.release() log.step(account_name + " 完成") except SystemExit, se: if se.code == 0: log.step(account_name + " 提前退出") else: log.error(account_name + " 异常退出")
def run(self): global TOTAL_IMAGE_COUNT account_id = self.account_info[0] if len(self.account_info) >= 3 and self.account_info[2]: account_name = self.account_info[2] else: account_name = self.account_info[0] try: log.step(account_name + " 开始") # 获取账号对应的page_id account_page_id = get_account_page_id(account_id) if account_page_id is None: log.error(account_name + " 微博主页没有获取到page_id") tool.process_exit() page_count = 1 this_account_total_image_count = 0 first_article_time = "0" is_over = False image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name) while not is_over: # 获取一页文章预览页面 preview_article_page = get_one_page_preview_article_data(account_page_id, page_count) if preview_article_page is None: log.error(account_name + " 第%s页文章获取失败" % page_count) tool.process_exit() # 将文章预览页面内容分组 preview_article_data_list = get_preview_article_data_list(preview_article_page) if len(preview_article_data_list) == 0: log.error(account_name + " 第%s页文章解析失败,页面:%s" % (page_count, preview_article_page)) tool.process_exit() for preview_article_data in preview_article_data_list: # 获取文章的发布时间 article_time = get_article_time(preview_article_data) if article_time is None: log.error(account_name + " 预览 %s 中的文章发布时间解析失败" % preview_article_data) continue # 检查是否是上一次的最后视频 if article_time <= int(self.account_info[1]): is_over = True break # 将第一个视频的地址做为新的存档记录 if first_article_time == "0": first_article_time = str(article_time) # 获取文章地址 article_url = get_article_url(preview_article_data) if article_url is None: log.error(account_name + " 预览 %s 中的文章地址解析失败" % preview_article_data) continue # 获取文章id article_id = get_article_id(article_url) if article_id is None: log.error(account_name + " 文章地址 %s 解析文章id失败" % article_url) continue # 获取文章页面内容 article_page = auto_redirect_visit(article_url) if not article_page: log.error(account_name + " 文章 %s 获取失败" % article_url) continue # 文章标题 title = get_article_title(article_page, article_id[0]) # 过滤标题中不支持的字符 title = robot.filter_text(title) if title: article_path = os.path.join(image_path, "%s %s" % (article_id, title)) else: article_path = os.path.join(image_path, article_id) if not tool.make_dir(article_path, 0): # 目录出错,把title去掉后再试一次,如果还不行退出 log.error(account_name + " 创建文章目录 %s 失败,尝试不使用title" % article_path) article_path = os.path.join(image_path, article_id) if not tool.make_dir(article_path, 0): log.error(account_name + " 创建文章目录 %s 失败" % article_path) tool.process_exit() # 文章顶部图片 top_picture_url = get_article_top_picture_url(article_page) if top_picture_url: log.step(account_name + " %s 开始下载顶部图片 %s" % (title, top_picture_url)) file_type = top_picture_url.split(".")[-1] file_path = os.path.join(article_path, "0000.%s" % file_type) if tool.save_net_file(top_picture_url, file_path): log.step(account_name + " %s 顶部图片下载成功" % title) this_account_total_image_count += 1 else: log.error(account_name + " %s 顶部图片 %s 下载失败" % (title, top_picture_url)) # 获取文章正文的图片地址列表 image_url_list = get_article_image_url_list(article_page, article_id[0]) if image_url_list is None: log.error(account_name + " 文章 %s 正文解析失败" % article_url) continue image_count = 1 for image_url in list(image_url_list): if image_url.find("/p/e_weibo_com") >= 0 or image_url.find("e.weibo.com") >= 0: continue log.step(account_name + " %s 开始下载第%s张图片 %s" % (title, image_count, image_url)) file_type = image_url.split(".")[-1] file_path = os.path.join(article_path, "%s.%s" % (image_count, file_type)) if tool.save_net_file(image_url, file_path): log.step(account_name + " %s 第%s张图片下载成功" % (title, image_count)) image_count += 1 else: log.error(account_name + " %s 第%s张图片 %s 下载失败" % (title, image_count, image_url)) if image_count > 1: this_account_total_image_count += image_count - 1 if not is_over: # 获取文章总页数 if page_count >= get_max_page_count(preview_article_page): is_over = True else: page_count += 1 log.step(account_name + " 下载完毕,总共获得%s张图片" % this_account_total_image_count) # 新的存档记录 if first_article_time != "0": self.account_info[1] = first_article_time # 保存最后的信息 tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH) self.thread_lock.acquire() TOTAL_IMAGE_COUNT += this_account_total_image_count ACCOUNTS.remove(account_id) self.thread_lock.release() log.step(account_name + " 完成") except SystemExit, se: if se.code == 0: log.step(account_name + " 提前退出") else: log.error(account_name + " 异常退出")
def run(self): global TOTAL_VIDEO_COUNT global GET_PAGE_COUNT account_id = self.account_info[0] if len(self.account_info) >= 4 and self.account_info[3]: account_name = self.account_info[3] else: account_name = self.account_info[0] # 原创、翻唱 audio_type_to_index = {"yc": 1, "fc": 2} try: log.step(account_name + " 开始") video_count = 1 for audio_type in audio_type_to_index.keys(): video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name, audio_type) page_count = 1 first_audio_id = "0" unique_list = [] is_over = False need_make_download_dir = True while not is_over: # 获取指定一页的歌曲信息列表 audio_list = get_one_page_audio_list(account_id, audio_type, page_count) if audio_list is None: log.step(account_name + " 第%s页%s歌曲页面获取失败" % (page_count, audio_type)) first_audio_id = "0" break # 存档恢复 # 如果为空,表示已经取完了 if len(audio_list) == 0: break for audio_info in list(audio_list): audio_id = audio_info[0] # 过滤标题中不支持的字符 audio_title = robot.filter_text(audio_info[1]) # 检查是否歌曲id小于上次的记录 if int(audio_id) <= int(self.account_info[audio_type_to_index[audio_type]]): is_over = True break # 新增歌曲导致的重复判断 if audio_id in unique_list: continue else: unique_list.append(audio_id) # 将第一首歌曲id做为新的存档记录 if first_audio_id == "0": first_audio_id = str(audio_id) # 获取歌曲的下载地址 audio_url = get_audio_url(audio_id, audio_type_to_index[audio_type]) if audio_url is None: log.step(account_name + " %s歌曲ID %s,下载地址获取失败" % (audio_type, audio_id)) continue if not audio_url: log.step(account_name + " %s歌曲ID %s,暂不提供下载地址" % (audio_type, audio_id)) continue log.step(account_name + " 开始下载第%s首歌曲 %s" % (video_count, audio_url)) # 第一首歌曲,创建目录 if need_make_download_dir: if not tool.make_dir(video_path, 0): log.error(account_name + " 创建歌曲下载目录 %s 失败" % video_path) tool.process_exit() need_make_download_dir = False file_path = os.path.join(video_path, "%s - %s.mp3" % (audio_id, audio_title)) if tool.save_net_file(audio_url, file_path): log.step(account_name + " 第%s首歌曲下载成功" % video_count) video_count += 1 else: log.error(account_name + " 第%s首歌曲 %s 下载失败" % (video_count, audio_url)) # 达到配置文件中的下载数量,结束 if 0 < GET_VIDEO_COUNT < video_count: is_over = True break if not is_over: # 达到配置文件中的下载页数,结束 if 0 < GET_PAGE_COUNT <= page_count: is_over = True # 获取的歌曲数量少于1页的上限,表示已经到结束了 # 如果歌曲数量正好是页数上限的倍数,则由下一页获取是否为空判断 elif len(audio_list) < 20: is_over = True else: page_count += 1 # 新的存档记录 if first_audio_id != "0": self.account_info[audio_type_to_index[audio_type]] = first_audio_id log.step(account_name + " 下载完毕,总共获得%s首歌曲" % (video_count - 1)) # 保存最后的信息 tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH) self.thread_lock.acquire() TOTAL_VIDEO_COUNT += video_count - 1 ACCOUNTS.remove(account_id) self.thread_lock.release() log.step(account_name + " 完成") except SystemExit, se: if se.code == 0: log.step(account_name + " 提前退出") else: log.error(account_name + " 异常退出")
def main(self): # 解析存档文件,获取上一次的album id page_count = 1 if os.path.exists(self.save_data_path): save_file = open(self.save_data_path, "r") save_info = save_file.read() save_file.close() page_count = int(save_info.strip()) total_image_count = 0 error_count = 0 is_over = False while not is_over: album_status, album_data = get_one_page_album_data(page_count) if album_status == -1: log.error("第%s页相册获取失败" % page_count) break elif album_status == -2: log.error("第%s页相册解析失败" % page_count) break elif album_status == 2: error_count += 1 if error_count >= ERROR_PAGE_COUNT_CHECK: log.error("连续%s页相册没有图片,退出程序" % ERROR_PAGE_COUNT_CHECK) page_count -= error_count - 1 break else: log.error("第%s页相册已被删除" % page_count) page_count += 1 continue elif album_status == 3: log.error("第%s页歌曲相册" % page_count) page_count += 1 continue elif album_status == 4: log.error("第%s页相册未知相册类型%s" % (page_count, album_data)) break # 错误数量重置 error_count = 0 # 下载目录标题 title = "" if album_data["title"]: # 过滤标题中不支持的字符 title = robot.filter_text(str(album_data["title"].encode("utf-8"))) if title: image_path = os.path.join(self.image_download_path, "%04d %s" % (page_count, title)) else: image_path = os.path.join(self.image_download_path, "%04d" % page_count) if not tool.make_dir(image_path, 0): # 目录出错,把title去掉后再试一次,如果还不行退出 log.error("第%s页创建相册目录 %s 失败,尝试不使用title" % (page_count, image_path)) post_path = os.path.join(image_path, page_count) if not tool.make_dir(post_path, 0): log.error("第%s页创建相册目录 %s 失败" % (page_count, image_path)) tool.process_exit() image_count = 1 for image_data in album_data["attr"]["img"]: image_url = "http://www.zunguang.com/%s" % str(image_data["url"]) log.step("开始下载第%s页第%s张图片 %s" % (page_count, image_count, image_url)) file_type = image_url.split(".")[-1] file_path = os.path.join(image_path, "%03d.%s" % (image_count, file_type)) try: if tool.save_net_file(image_url, file_path, True): log.step("第%s页第%s张图片下载成功" % (page_count, image_count)) image_count += 1 else: log.error("第%s页第%s张图片 %s 下载失败" % (page_count, image_count, image_url)) except SystemExit: log.step("提前退出") tool.remove_dir(image_path) is_over = True break if not is_over: total_image_count += image_count - 1 page_count += 1 # 重新保存存档文件 save_data_dir = os.path.dirname(self.save_data_path) if not os.path.exists(save_data_dir): tool.make_dir(save_data_dir, 0) save_file = open(self.save_data_path, "w") save_file.write(str(page_count)) save_file.close() log.step("全部下载完毕,耗时%s秒,共计图片%s张" % (self.get_run_time(), total_image_count))