def run(self): if not os.path.exists(self.path): try: os.makedirs(self.path) except Exception as e: error_log("make dir Fail:" + self.path) error_log(e) return try: path = ImageDownload.download_topics(self.url, self.path, create_path=self.create_path, downloader=self.downloader) if self.success: self.success(CommonUtils.build_callback_msg(path, url=self.url)) if self.callback_params and self.callback_params.has_key( 'current_count') and self.callback_params.has_key( 'all_count'): current_count = self.callback_params[ 'current_count'].getAndInc() if self.callback_params['all_count'] == (current_count + 1): self.success("Download from Pixivision:\n" + self.callback_params['url'] + "\nAll tasks are complete!\n\n") except Exception as e: print("Download topics fail") print(e) if self.fail: self.fail()
def download_illustration(self, illu, path, p_limit=0): """ 通过程序构造的插画详情下载 :param illu: 插画详情 :param path: 下载路径 :param p_limit: 插画p数(页数)限制 0代表不限制 :return: """ if illu.has_key("url") and illu.has_key("title"): illust_id = CommonUtils.get_url_param(illu.url, "illust_id") detail = self.api.illust_detail(illust_id) if detail: try: detail = detail.illust # 普通插画 if detail.page_count == 1: try: url = detail.meta_single_page.original_image_url except Exception: url = detail.image_urls.large path = self.download(illust_id, path, url) # 多图插画 else: if 0 < p_limit < detail.page_count: # 该插画P数大于最大限制,放弃下载 print("Pixiv id:%s, P>limit,Skip download" % (illust_id, )) return PAGE_LIMIT_CONTINUE urls = detail.meta_pages # 获取多图 if len(urls) > 1: # 多图放入一个文件夹中 path += "/p_%s" % illust_id if not os.path.exists(path): os.mkdir(path) for index in range(len(urls)): try: url = urls[index].image_urls.original if \ urls[index].image_urls.has_key("original") else urls[index].image_urls.large extension = os.path.splitext(url)[1] save_path = path + "/p_%s_%d%s" % ( illust_id, index, extension) print(save_path) self.api.download(url, path=save_path) except Exception: continue path = path + "/" else: # 获取多图失败,下载大图 url = detail.image_urls.large path = self.download(illust_id, path, url) return path except Exception as e: error_log("Download fail:") error_log(e) else: print(illu.title + " can't get detail id :" + illust_id) else: return
def run(self): if not os.path.exists(self.path): try: os.makedirs(self.path) except Exception, e: error_log("make dir Fail:" + self.path) error_log(e) return
def download_all_by_id(self, illust_id, path, p_limit=0): """ 通过pixiv id下载插画 :param illust_id: id :param path: 下载路径 :param p_limit: 是否限制插画p数(页数) :return: """ detail = self.api.illust_detail(illust_id) if detail: try: detail = detail.illust # 普通插画 if detail.page_count == 1: try: url = detail.meta_single_page.original_image_url except Exception: url = detail.image_urls.large extension = os.path.splitext(url)[1] save_path = path + "/p_%s%s" % (illust_id, extension) print("Downloading:" + save_path) path = self.api.download(url, path=save_path) # 多图插画 else: if 0 < p_limit < detail.page_count: # 该插画P数大于最大限制,放弃下载 print("Pixiv id:%s P>limit,Skip download" % (illust_id, )) return PAGE_LIMIT_CONTINUE urls = detail.meta_pages # 获取多图 if len(urls) > 1: # 多图放入一个文件夹中 path += "/p_%s" % illust_id if not os.path.exists(path): os.mkdir(path) for index in range(len(urls)): try: url = urls[index].image_urls.original if \ urls[index].image_urls.has_key("original") else urls[index].image_urls.large extension = os.path.splitext(url)[1] save_path = path + "/p_%s_%d%s" % ( illust_id, index, extension) print("Downloading:" + save_path) self.api.download(url, path=save_path) except Exception: continue path = path + "/" else: # 获取多图失败,下载大图 url = detail.image_urls.large path = self.api.download(url, prefix=path) return path except Exception as e: error_log("Download fail:" + str(detail)) error_log(e) else: print(" can't get detail id :" + str(illust_id))
def download_illustration(illu, path, auth_api): """ #illu 包含插画详细 path 存储路径 auth_api 具有身份验证的下载工具 """ if illu.has_key("url") and illu.has_key("title"): illust_id = CommonUtils.get_url_param(illu.url, "illust_id") detail = PixivApi.illust_detail(illust_id) if detail: try: detail = detail.illust # 普通插画 if detail.page_count == 1: try: url = detail.meta_single_page.original_image_url except: url = detail.image_urls.large download(illust_id, illu.title, path, url, auth_api) # 多图插画 else: if detail.page_count > P_LIMIT: # 该插画P数大于最大限制,放弃下载 print("Pixiv id:%s,name:%s P>limit,Skip download" % (illust_id, illu.title)) return urls = detail.meta_pages # 获取多图 if len(urls) > 1: # 多图放入一个文件夹中 path += "/p_%s" % illust_id if not os.path.exists(path): os.mkdir(path) for index in range(len(urls)): try: url = urls[index].image_urls.original if \ urls[index].image_urls.has_key("original") else urls[index].image_urls.large extension = os.path.splitext(url)[1] if IMAGE_USE_ORG_NAME: save_path = path + "/p_%s_%s_%d%s" % ( illust_id, CommonUtils.filter_dir_name(illu.title), index, extension) else: save_path = path + "/p_%s_%d%s" % (illust_id, index, extension) print(save_path) auth_api.download(url, path=save_path) except: continue else: # 获取多图失败,下载大图 url = detail.image_urls.large download(illust_id, illu.title, path, url, auth_api) except Exception, e: error_log("Download fail:") error_log(e) else: print(illu.title + " can't get detail id :" + illust_id)
def handler_data(self, illu): print(illu) illu = json.loads(illu) if illu.has_key("url"): if illu.has_key("path"): print("Download start" + illu["path"]) self.pixiv_api.download(illu["url"], illu["path"]) else: print("Download start" + illu["url"]) self.pixiv_api.download(illu["url"]) else: error_log("Error data:" + str(illu))
def download_by_detail(self, detail, path, p_limit=0): """ 通过api获取的插画详情 下载 :param detail: 插画详情 :param path: 下载路径 :param p_limit: 插画p数(页数)限制 0代表不限制 :return: """ if detail: try: illust_id = detail.id # 普通插画 if detail.page_count == 1: try: url = detail.meta_single_page.original_image_url except Exception: url = detail.image_urls.large path = self.download(illust_id, path, url) # 多图插画 else: if 0 < p_limit < detail.page_count: # 该插画P数大于最大限制,放弃下载 print("Pixiv id:%s P>limit,Skip download" % (illust_id, )) return PAGE_LIMIT_CONTINUE urls = detail.meta_pages # 获取多图 if len(urls) > 1: # 多图放入一个文件夹中 path += "/p_%s" % illust_id if not os.path.exists(path): os.mkdir(path) for index in range(len(urls)): try: url = urls[index].image_urls.original if \ urls[index].image_urls.has_key("original") else urls[index].image_urls.large extension = os.path.splitext(url)[1] save_path = path + "/p_%s_%d%s" % ( illust_id, index, extension) print(save_path) self.api.download(url, path=save_path) except Exception: continue path = path + "/" else: # 获取多图失败,下载大图 url = detail.image_urls.large path = self.download(illust_id, path, url) return path except Exception as e: error_log("Download fail:") error_log(e)
def download_all_by_id(illust_id, path, limit_p=True): detail = PixivApi.illust_detail(illust_id) if detail: try: detail = detail.illust # 普通插画 if detail.page_count == 1: try: url = detail.meta_single_page.original_image_url except: url = detail.image_urls.large extension = os.path.splitext(url)[1] save_path = path + "/p_%s%s" % (illust_id, extension) print("Downloading:" + save_path) path = PixivApi.download(url, path=save_path) # 多图插画 else: if detail.page_count > P_LIMIT and limit_p: # 该插画P数大于最大限制,放弃下载 print("Pixiv id:%s P>limit,Skip download" % (illust_id,)) return urls = detail.meta_pages # 获取多图 if len(urls) > 1: # 多图放入一个文件夹中 path += "/p_%s" % illust_id if not os.path.exists(path): os.mkdir(path) for index in range(len(urls)): try: url = urls[index].image_urls.original if \ urls[index].image_urls.has_key("original") else urls[index].image_urls.large extension = os.path.splitext(url)[1] save_path = path + "/p_%s_%d%s" % (illust_id, index, extension) print("Downloading:" + save_path) PixivApi.download(url, path=save_path) except Exception: continue path = path + "/" else: # 获取多图失败,下载大图 url = detail.image_urls.large path = PixivApi.download(url, prefix=path) return path except Exception as e: error_log("Download fail:" + detail) error_log(e) else: print(" can't get detail id :" + str(illust_id))
def run_by_pool(): urls = [LINK_URL % n for n in range(1, PAGE_NUM + 1)] print (urls) # 5*20 最大100线程在运行 error_log("start:" + str(time.time())) pool = ThreadPool(minthreads=1, maxthreads=5) for url in urls: pool.callInThread(start, url, save_path=IMAGE_SAVE_BASEPATH) pool.start() while True: # 每20s判断一次线程池状态,没有线程正在运行则停止下载进程 time.sleep(20) if len(pool.working) == 0: pool.stop() error_log("end:" + str(time.time())) break
def run_by_list(): error_log("start:" + str(time.time())) # Pixivision全站插图爬取 urls = [LINK_URL % n for n in range(1, PAGE_NUM + 1)] # 步伐,每次启动 2 *20 个图片下载进程 ,可根据电脑性能调整线程大小。其实运行速度和内存CPU关系不大,关键是网速 step = 2 length = len(urls) start_index = 0 while start_index < length: launchers = [] for url in urls[start_index:(start_index + step)]: print("Start " + url) launchers.append(PixivisionLauncher(url, IMAGE_SAVE_BASEPATH)) for launcher in launchers: launcher.start() for launcher in launchers: launcher.join() start_index += step error_log("end:" + str(time.time()))
def download_topics(cls, url, path, quality=1): html = HtmlDownloader.download(url) illu_list = HtmlDownloader.parse_illustration(html) title_des = HtmlDownloader.get_title(html) if title_des and illu_list: title_des["size"] = len(illu_list) CommonUtils.write_topic_des(path + "/topic.txt", title_des) if not illu_list: return for illu in illu_list: try: filename = CommonUtils.filter_dir_name(illu.title) extension = os.path.splitext(illu.image)[1] id = CommonUtils.get_url_param(illu.image_page, "illust_id") if quality == 1: # 通过api获取 插画原图地址,下载原图 detail = PixivApi.illust_detail(id) if detail: download_url = ImageDownload.get_image_url( illu, detail) if IMAGE_USE_ORG_NAME: save_path = path + "/p_%s_%s%s" % (id, filename, extension) else: save_path = path + "/p_%s%s" % (id, extension) print(save_path) PixivApi.download(download_url, path=save_path) else: print(illu.title + " can't get detail id :" + id) else: # 直接下载 pixivision 展示图 print(path + "/p_%s_%s%s" % (id, filename, extension)) PixivApi.download(illu.image, path=path + "/p_%s_%s%s" % (id, filename, extension)) except Exception, e: error_log("Download Illu Fail:" + " Illustration :" + str(illu)) error_log(e) continue
def get_pixivision_topics(cls, url, path): topic_list = HtmlDownloader.parse_illustration_topic( HtmlDownloader.download(url)) if not topic_list: error_log(url + " not find any illustration topic") return for topic in topic_list: try: # 需要过滤掉特殊字符,否则会创建文件夹失败。 # 创建特辑文件夹,写入特辑信息。 save_path = path + "/" + CommonUtils.filter_dir_name( topic.title) if not os.path.exists(save_path): os.makedirs(save_path) CommonUtils.write_topic(save_path + "/topic.txt", topic) topic['save_path'] = save_path except Exception, e: continue error_log("Create topic path fail,topic url:" + topic.Href) error_log(e)