Exemplo n.º 1
0
def download(illust_id, title, path, url, auth_api):
    extension = os.path.splitext(url)[1]
    if IMAGE_USE_ORG_NAME:
        save_path = path + "/p_%s_%s%s" % (
            illust_id, CommonUtils.filter_dir_name(title), extension)
    else:
        save_path = path + "/p_%s%s" % (illust_id, extension)
    print(save_path)
    auth_api.download(url, path=save_path)
Exemplo n.º 2
0
def download_illustration(illu, path, auth_api):
    """
    #illu 包含插画详细
    path 存储路径
    auth_api 具有身份验证的下载工具
    """
    if illu.has_key("url") and illu.has_key("title"):
        illust_id = CommonUtils.get_url_param(illu.url, "illust_id")
        detail = PixivApi.illust_detail(illust_id)
        if detail:
            try:
                detail = detail.illust
                # 普通插画
                if detail.page_count == 1:
                    try:
                        url = detail.meta_single_page.original_image_url
                    except:
                        url = detail.image_urls.large
                    download(illust_id, illu.title, path, url, auth_api)
                # 多图插画
                else:
                    if detail.page_count > P_LIMIT:
                        # 该插画P数大于最大限制,放弃下载
                        print("Pixiv id:%s,name:%s P>limit,Skip download" % (illust_id, illu.title))
                        return
                    urls = detail.meta_pages
                    # 获取多图
                    if len(urls) > 1:
                        # 多图放入一个文件夹中
                        path += "/p_%s" % illust_id
                        if not os.path.exists(path):
                            os.mkdir(path)
                        for index in range(len(urls)):
                            try:
                                url = urls[index].image_urls.original if \
                                    urls[index].image_urls.has_key("original") else urls[index].image_urls.large
                                extension = os.path.splitext(url)[1]
                                if IMAGE_USE_ORG_NAME:
                                    save_path = path + "/p_%s_%s_%d%s" % (
                                        illust_id,
                                        CommonUtils.filter_dir_name(illu.title),
                                        index, extension)
                                else:
                                    save_path = path + "/p_%s_%d%s" % (illust_id, index, extension)
                                print(save_path)
                                auth_api.download(url, path=save_path)
                            except:
                                continue
                    else:
                        # 获取多图失败,下载大图
                        url = detail.image_urls.large
                        download(illust_id, illu.title, path, url, auth_api)
            except Exception, e:
                error_log("Download fail:")
                error_log(e)
        else:
            print(illu.title + " can't get detail id :" + illust_id)
Exemplo n.º 3
0
 def handle_search(self):
     keywords = self.keywords.get().strip()
     if CommonUtils.is_empty(keywords):
         showwarning("warning", "Please enter search keywords!")
         print("warning", "Please enter search keywords!")
         return
     if CommonUtils.is_empty(self.path_var.get()):
         showwarning("warning", "path can't be empty!")
         print("warning", "path can't be empty!")
         return
     path = self.path_var.get().strip()
     if not os.path.exists(path):
         showerror("error", " No such file or directory!")
         print('error', 'No such file or directory')
         return
     path = path + "/" + CommonUtils.filter_dir_name("search_" + keywords)
     showinfo("info", "Is searching:")
     search_handler = Thread(target=self.search, args=(keywords, path))
     search_handler.start()
Exemplo n.º 4
0
 def handle_related(self):
     id_var = CommonUtils.set_int(self.id_var.get().strip())
     if id_var <= 0:
         showwarning("warning", "Please enter search keywords!")
         print("warning", "Please enter search keywords!")
         return
     if CommonUtils.is_empty(self.path_var.get()):
         showwarning("warning", "path can't be empty!")
         print("warning", "path can't be empty!")
         return
     path = self.path_var.get().strip()
     if not os.path.exists(path):
         showerror("error", " No such file or directory!")
         print('error', 'No such file or directory')
         return
     path = path + "/" + CommonUtils.filter_dir_name("related_" +
                                                     str(id_var))
     showinfo("info", "Get related illus of " + str(id_var) + " :")
     related_handler = Thread(target=self.related, args=(id_var, path))
     related_handler.start()
Exemplo n.º 5
0
 def get_pixivision_topics(cls, url, path):
     topic_list = HtmlDownloader.parse_illustration_topic(
         HtmlDownloader.download(url))
     if not topic_list:
         error_log(url + " not find any illustration topic")
         return
     for topic in topic_list:
         try:
             # 需要过滤掉特殊字符,否则会创建文件夹失败。
             # 创建特辑文件夹,写入特辑信息。
             save_path = path + "/" + CommonUtils.filter_dir_name(
                 topic.title)
             if not os.path.exists(save_path):
                 os.makedirs(save_path)
             CommonUtils.write_topic(save_path + "/topic.txt", topic)
             topic['save_path'] = save_path
         except Exception, e:
             continue
             error_log("Create topic path fail,topic url:" + topic.Href)
             error_log(e)
Exemplo n.º 6
0
 def download_topics(cls, url, path, quality=1):
     html = HtmlDownloader.download(url)
     illu_list = HtmlDownloader.parse_illustration(html)
     title_des = HtmlDownloader.get_title(html)
     if title_des and illu_list:
         title_des["size"] = len(illu_list)
         CommonUtils.write_topic_des(path + "/topic.txt", title_des)
     if not illu_list:
         return
     for illu in illu_list:
         try:
             filename = CommonUtils.filter_dir_name(illu.title)
             extension = os.path.splitext(illu.image)[1]
             id = CommonUtils.get_url_param(illu.image_page, "illust_id")
             if quality == 1:
                 # 通过api获取 插画原图地址,下载原图
                 detail = PixivApi.illust_detail(id)
                 if detail:
                     download_url = ImageDownload.get_image_url(
                         illu, detail)
                     if IMAGE_USE_ORG_NAME:
                         save_path = path + "/p_%s_%s%s" % (id, filename,
                                                            extension)
                     else:
                         save_path = path + "/p_%s%s" % (id, extension)
                     print(save_path)
                     PixivApi.download(download_url, path=save_path)
                 else:
                     print(illu.title + " can't get detail id :" + id)
             else:
                 # 直接下载 pixivision 展示图
                 print(path + "/p_%s_%s%s" % (id, filename, extension))
                 PixivApi.download(illu.image,
                                   path=path + "/p_%s_%s%s" %
                                   (id, filename, extension))
         except Exception, e:
             error_log("Download Illu Fail:" + " Illustration :" +
                       str(illu))
             error_log(e)
             continue
Exemplo n.º 7
0
        page = SEARCH_PAGE
        keyword = SEARCH_KEYWORD
    else:
        username = raw_input("Please enter your pixiv accounts eamil or pixiv ID\n")
        password = raw_input('Enter password:\n ')
        print ("Loading")
        data_handler = PixivDataDownloader.PixivDataHandler(username, password)
        auth_api = AuthPixivApi(username, password)
        print("Login success!!!!")
        path = raw_input("Please input illustration save path:\n")
        page = int(raw_input("Please enter the total number of pages you want to crawl:\n"))
        download_threshold = int(raw_input("Please enter the minimum number of illustration's bookmarks:\n"))
        keyword = raw_input("Please enter search keyword:\n")
        keyword = keyword.decode("utf-8")
    queue = Queue()
    path = path + "/" + CommonUtils.filter_dir_name("search_" + keyword)
    # 默认消费者下载线程数为10个,可根据下载量和机器性能适当增加
    thread_num = 10
    if not os.path.exists(path):
        os.makedirs(path)
    for i in range(thread_num):
        t = Thread(target=download_queue, name="Thread" + str(i), args=(queue, path, auth_api))
        t.daemon = True
        t.start()
    # 因为搜索的结果量不大,直接使用set在内存中过滤重复元素,不需要使用redisFilter
    set_filter = set()
    for p in range(1, page + 1):
        result = data_handler.search(keyword, page=p, download_threshold=download_threshold)
        print(result)

        for illu in result: