def getMemberPage(self, member_id, page=1, bookmark=False, tags=None): artist = None response = None if tags is not None: tags = PixivHelper.encode_tags(tags) else: tags = '' limit = 48 offset = (page - 1) * limit need_to_slice = False if bookmark: # https://www.pixiv.net/ajax/user/1039353/illusts/bookmarks?tag=&offset=0&limit=24&rest=show url = 'https://www.pixiv.net/ajax/user/{0}/illusts/bookmarks?tag={1}&offset={2}&limit={3}&rest=show'.format(member_id, tags, offset, limit) else: # https://www.pixiv.net/ajax/user/1813972/illusts/tag?tag=Fate%2FGrandOrder?offset=0&limit=24 # https://www.pixiv.net/ajax/user/1813972/manga/tag?tag=%E3%83%A1%E3%82%A4%E3%82%AD%E3%83%B3%E3%82%B0?offset=0&limit=24 # https://www.pixiv.net/ajax/user/5238/illustmanga/tag?tag=R-18&offset=0&limit=48 # https://www.pixiv.net/ajax/user/1813972/profile/all url = None if len(tags) > 0: url = 'https://www.pixiv.net/ajax/user/{0}/illustmanga/tag?tag={1}&offset={2}&limit={3}'.format(member_id, tags, offset, limit) elif self._config.r18mode: url = 'https://www.pixiv.net/ajax/user/{0}/illustmanga/tag?tag={1}&offset={2}&limit={3}'.format(member_id, 'R-18', offset, limit) else: url = 'https://www.pixiv.net/ajax/user/{0}/profile/all'.format(member_id) need_to_slice = True PixivHelper.print_and_log('info', 'Member Url: ' + url) if url is not None: # cache the response response = self._get_from_cache(url) if response is None: try: response = self.open_with_retry(url).read() except urllib2.HTTPError as ex: if ex.code == 404: response = ex.read() self._put_to_cache(url, response) PixivHelper.GetLogger().debug(response) artist = PixivModelWhiteCube.PixivArtist(member_id, response, False, offset, limit) artist.reference_image_id = artist.imageList[0] if len(artist.imageList) > 0 else 0 self.getMemberInfoWhitecube(member_id, artist, bookmark) if artist.haveImages and need_to_slice: artist.imageList = artist.imageList[offset:offset + limit] return (artist, response)
def get_image_bookmark(caller, config, hide, start_page=1, end_page=0, tag=None, use_image_tag=False): """Get user's image bookmark""" br = caller.__br__ total_list = list() i = start_page offset = 0 limit = 48 member_id = br._myId total_bookmark_count = 0 encoded_tag = '' while True: if end_page != 0 and i > end_page: print("Page Limit reached: " + str(end_page)) break # https://www.pixiv.net/ajax/user/189816/illusts/bookmarks?tag=&offset=0&limit=48&rest=show show = "show" if hide: show = "hide" if tag is not None and len(tag) > 0: encoded_tag = PixivHelper.encode_tags(tag) offset = limit * (i - 1) PixivHelper.print_and_log( 'info', f"Importing user's bookmarked image from page {i}") url = f"https://www.pixiv.net/ajax/user/{member_id}/illusts/bookmarks?tag={encoded_tag}&offset={offset}&limit={limit}&rest={show}" if use_image_tag: # don't filter based on user's bookmark tag url = f"https://www.pixiv.net/ajax/user/{member_id}/illusts/bookmarks?tag=&offset={offset}&limit={limit}&rest={show}" PixivHelper.print_and_log('info', f"Using image tag: {tag}") PixivHelper.print_and_log('info', f"Source URL: {url}") page = br.open(url) page_str = page.read().decode('utf8') page.close() if use_image_tag: (bookmarks, total_bookmark_count) = PixivBookmark.parseImageBookmark( page_str, image_tags_filter=tag) else: (bookmarks, total_bookmark_count) = PixivBookmark.parseImageBookmark(page_str) total_list.extend(bookmarks) if len(bookmarks) == 0 and not use_image_tag: print("No more images.") break elif use_image_tag and total_bookmark_count / limit < i: print("Last page reached.") break else: print(f" found {len(bookmarks)} images.") i = i + 1 # Issue#569 PixivHelper.wait(config=config) return (total_list, total_bookmark_count)
def process_tags(caller, config, tags, page=1, end_page=0, wild_card=True, title_caption=False, start_date=None, end_date=None, use_tags_as_dir=False, member_id=None, bookmark_count=None, sort_order='date_d', type_mode=None, notifier=None): if notifier is None: notifier = PixivHelper.dummy_notifier search_page = None _last_search_result = None i = page updated_limit_count = 0 empty_page_retry = 0 try: search_tags = PixivHelper.decode_tags(tags) root_dir = config.rootDirectory if use_tags_as_dir: PixivHelper.print_and_log(None, "Save to each directory using query tags.") root_dir = config.rootDirectory + os.sep + PixivHelper.sanitize_filename(search_tags) tags = PixivHelper.encode_tags(tags) images = 1 last_image_id = -1 skipped_count = 0 use_bookmark_data = False if bookmark_count is not None and bookmark_count > 0: use_bookmark_data = True offset = PixivTags.POSTS_PER_PAGE start_offset = (page - 1) * offset stop_offset = end_page * offset PixivHelper.print_and_log('info', f'Searching for: ({search_tags}) {tags} with partial match = {wild_card} and title/caption = {title_caption}') flag = True while flag: (t, search_page) = PixivBrowserFactory.getBrowser().getSearchTagPage(tags, i, wild_card=wild_card, title_caption=title_caption, start_date=start_date, end_date=end_date, member_id=member_id, sort_order=sort_order, start_page=page, use_bookmark_data=use_bookmark_data, bookmark_count=bookmark_count, type_mode=type_mode, r18mode=config.r18mode) PixivHelper.print_and_log("info", f'Found {len(t.itemList)} images for page {i}.') if len(t.itemList) == 0: # Issue #1090 # check if the available images matching with current page * PixivTags.POSTS_PER_PAGE # and wait for {timeout} seconds and retry the page up to {config.retry} times. if _last_search_result is not None and _last_search_result.availableImages > (PixivTags.POSTS_PER_PAGE * i) and empty_page_retry < config.retry: PixivHelper.print_and_log("warn", f'Server did not return images, expected to have more (Total Post = {_last_search_result.availableImages}, current max posts = {PixivTags.POSTS_PER_PAGE * i}).') # wait at least 2 minutes before retry delay = config.timeout if delay < 120: delay = 120 PixivHelper.print_and_log(None, f"Waiting for {delay} seconds before retrying.") PixivHelper.print_delay(delay) empty_page_retry = empty_page_retry + 1 PixivBrowserFactory.getBrowser().addheaders = [('User-agent', f'{config.useragent}{int(time.time())}')] continue else: PixivHelper.print_and_log("warn", 'No more images.') flag = False elif _last_search_result is not None: set1 = set((x.imageId) for x in _last_search_result.itemList) difference = [x for x in t.itemList if (x.imageId) not in set1] if len(difference) == 0: PixivHelper.print_and_log("warn", 'Getting duplicated result set, no more new images.') flag = False if flag: # Issue #1090 reset retry flag on succesfull load empty_page_retry = 0 for item in t.itemList: last_image_id = item.imageId PixivHelper.print_and_log(None, f'Image #{images}') PixivHelper.print_and_log(None, f'Image Id: {item.imageId}') if bookmark_count is not None and bookmark_count > item.bookmarkCount: PixivHelper.print_and_log(None, f'Bookmark Count: {item.bookmarkCount}') PixivHelper.print_and_log('info', f'Skipping imageId= {item.imageId} because less than bookmark count limit ({bookmark_count} > {item.bookmarkCount}).') skipped_count = skipped_count + 1 continue result = 0 while True: try: if t.availableImages > 0: # PixivHelper.print_and_log(None, "Total Images: " + str(t.availableImages)) total_image = t.availableImages if(stop_offset > 0 and stop_offset < total_image): total_image = stop_offset total_image = total_image - start_offset # PixivHelper.print_and_log(None, "Total Images Offset: " + str(total_image)) else: total_image = ((i - 1) * 20) + len(t.itemList) title_prefix = "Tags:{0} Page:{1} Image {2}+{3} of {4}".format(tags, i, images, skipped_count, total_image) if member_id is not None: title_prefix = "MemberId: {0} Tags:{1} Page:{2} Image {3}+{4} of {5}".format(member_id, tags, i, images, skipped_count, total_image) result = PixivConstant.PIXIVUTIL_OK if not caller.DEBUG_SKIP_PROCESS_IMAGE: result = PixivImageHandler.process_image(caller, config, None, item.imageId, user_dir=root_dir, search_tags=search_tags, title_prefix=title_prefix, bookmark_count=item.bookmarkCount, image_response_count=item.imageResponse, notifier=notifier) PixivHelper.wait(result, config) break except KeyboardInterrupt: result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT break except http.client.BadStatusLine: PixivHelper.print_and_log(None, "Stuff happened, trying again after 2 second...") PixivHelper.print_delay(2) images = images + 1 if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT): updated_limit_count = updated_limit_count + 1 if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit: PixivHelper.print_and_log(None, f"Skipping tags: {tags}") PixivBrowserFactory.getBrowser().clear_history() return gc.collect() continue elif result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT: choice = input("Keyboard Interrupt detected, continue to next image (Y/N)").rstrip("\r") if choice.upper() == 'N': PixivHelper.print_and_log("info", f"Tags: {tags}, processing aborted.") flag = False break else: continue PixivBrowserFactory.getBrowser().clear_history() i = i + 1 _last_search_result = t if end_page != 0 and end_page < i: PixivHelper.print_and_log('info', f"End Page reached: {end_page}") flag = False if t.isLastPage: PixivHelper.print_and_log('info', f"Last page: {i - 1}") flag = False if config.enableInfiniteLoop and i == 1001 and sort_order != 'date': if last_image_id > 0: # get the last date PixivHelper.print_and_log('info', f"Hit page 1000, trying to get workdate for last image id: {last_image_id}.") # referer = 'https://www.pixiv.net/en/artworks/{0}'.format(last_image_id) result = PixivBrowserFactory.getBrowser().getImagePage(last_image_id) _last_date = result[0].worksDateDateTime # _start_date = image.worksDateDateTime + datetime.timedelta(365) # hit the last page i = 1 end_date = _last_date.strftime("%Y-%m-%d") PixivHelper.print_and_log('info', f"Hit page 1000, looping back to page 1 with ecd: {end_date}.") flag = True last_image_id = -1 else: PixivHelper.print_and_log('info', "No more image in the list.") flag = False PixivHelper.print_and_log(None, 'done') if search_page is not None: del search_page except KeyboardInterrupt: raise except BaseException: PixivHelper.print_and_log('error', f'Error at process_tags() at page {i}: {sys.exc_info()}') try: if search_page is not None: dump_filename = f'Error page for search tags {tags} at page {i}.html' PixivHelper.dump_html(dump_filename, search_page) PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}") except BaseException: PixivHelper.print_and_log('error', f'Cannot dump page for search tags: {search_tags}') raise
def process_tags(caller, tags, page=1, end_page=0, wild_card=True, title_caption=False, start_date=None, end_date=None, use_tags_as_dir=False, member_id=None, bookmark_count=None, oldest_first=False, type_mode=None, notifier=None, job_option=None): # caller function/method # TODO: ideally to be removed or passed as argument config = caller.__config__ config.loadConfig(path=caller.configfile) if notifier is None: notifier = PixivHelper.dummy_notifier # override the config source if job_option is give for filename formats format_src = config if job_option is not None: format_src = job_option search_page = None _last_search_result = None i = page updated_limit_count = 0 try: search_tags = PixivHelper.decode_tags(tags) if use_tags_as_dir: PixivHelper.print_and_log( None, "Save to each directory using query tags.") format_src.rootDirectory += os.sep + PixivHelper.sanitize_filename( search_tags) tags = PixivHelper.encode_tags(tags) images = 1 last_image_id = -1 skipped_count = 0 use_bookmark_data = False if bookmark_count is not None and bookmark_count > 0: use_bookmark_data = True offset = 60 start_offset = (page - 1) * offset stop_offset = end_page * offset PixivHelper.print_and_log('info', f'Searching for: ({search_tags}) {tags}') flag = True while flag: (t, search_page) = PixivBrowserFactory.getBrowser().getSearchTagPage( tags, i, wild_card, title_caption, start_date, end_date, member_id, oldest_first, page, use_bookmark_data, bookmark_count, type_mode, r18mode=format_src.r18mode) if len(t.itemList) == 0: PixivHelper.print_and_log(None, 'No more images') flag = False elif _last_search_result is not None: set1 = set((x.imageId) for x in _last_search_result.itemList) difference = [x for x in t.itemList if (x.imageId) not in set1] if len(difference) == 0: PixivHelper.print_and_log( None, 'Getting duplicated result set, no more new images.') flag = False if flag: for item in t.itemList: last_image_id = item.imageId PixivHelper.print_and_log(None, f'Image #{images}') PixivHelper.print_and_log(None, f'Image Id: {item.imageId}') if bookmark_count is not None and bookmark_count > item.bookmarkCount: PixivHelper.print_and_log( None, f'Bookmark Count: {item.bookmarkCount}') PixivHelper.print_and_log( 'info', f'Skipping imageId= {item.imageId} because less than bookmark count limit ({bookmark_count} > {item.bookmarkCount}).' ) skipped_count = skipped_count + 1 continue result = 0 while True: try: if t.availableImages > 0: # PixivHelper.print_and_log(None, "Total Images: " + str(t.availableImages)) total_image = t.availableImages if (stop_offset > 0 and stop_offset < total_image): total_image = stop_offset total_image = total_image - start_offset # PixivHelper.print_and_log(None, "Total Images Offset: " + str(total_image)) else: total_image = ((i - 1) * 20) + len(t.itemList) title_prefix = "Tags:{0} Page:{1} Image {2}+{3} of {4}".format( tags, i, images, skipped_count, total_image) if member_id is not None: title_prefix = "MemberId: {0} Tags:{1} Page:{2} Image {3}+{4} of {5}".format( member_id, tags, i, images, skipped_count, total_image) result = PixivConstant.PIXIVUTIL_OK if not caller.DEBUG_SKIP_PROCESS_IMAGE: result = PixivImageHandler.process_image( caller, None, item.imageId, user_dir=format_src.rootDirectory, search_tags=search_tags, title_prefix=title_prefix, bookmark_count=item.bookmarkCount, image_response_count=item.imageResponse, notifier=notifier, job_option=job_option) PixivHelper.wait(result, config) break except KeyboardInterrupt: result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT break except http.client.BadStatusLine: PixivHelper.print_and_log( None, "Stuff happened, trying again after 2 second..." ) time.sleep(2) images = images + 1 if result in ( PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT): updated_limit_count = updated_limit_count + 1 if config.checkUpdatedLimit != 0 and updated_limit_count > config.checkUpdatedLimit: PixivHelper.print_and_log( None, f"Skipping tags: {tags}") PixivBrowserFactory.getBrowser().clear_history() return gc.collect() continue elif result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT: choice = input( "Keyboard Interrupt detected, continue to next image (Y/N)" ).rstrip("\r") if choice.upper() == 'N': PixivHelper.print_and_log( "info", f"Tags: {tags}, processing aborted.") flag = False break else: continue PixivBrowserFactory.getBrowser().clear_history() i = i + 1 _last_search_result = t if end_page != 0 and end_page < i: PixivHelper.print_and_log('info', f"End Page reached: {end_page}") flag = False if t.isLastPage: PixivHelper.print_and_log('info', f"Last page: {i - 1}") flag = False if config.enableInfiniteLoop and i == 1001 and not oldest_first: if last_image_id > 0: # get the last date PixivHelper.print_and_log( 'info', f"Hit page 1000, trying to get workdate for last image id: {last_image_id}." ) # referer = 'https://www.pixiv.net/en/artworks/{0}'.format(last_image_id) result = PixivBrowserFactory.getBrowser().getImagePage( last_image_id) _last_date = result[0].worksDateDateTime # _start_date = image.worksDateDateTime + datetime.timedelta(365) # hit the last page i = 1 end_date = _last_date.strftime("%Y-%m-%d") PixivHelper.print_and_log( 'info', f"Hit page 1000, looping back to page 1 with ecd: {end_date}." ) flag = True last_image_id = -1 else: PixivHelper.print_and_log('info', "No more image in the list.") flag = False PixivHelper.print_and_log(None, 'done') if search_page is not None: del search_page except KeyboardInterrupt: raise except BaseException: PixivHelper.print_and_log( 'error', f'Error at process_tags() at page {i}: {sys.exc_info()}') try: if search_page is not None: dump_filename = f'Error page for search tags {tags} at page {i}.html' PixivHelper.dump_html(dump_filename, search_page) PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}") except BaseException: PixivHelper.print_and_log( 'error', f'Cannot dump page for search tags: {search_tags}') raise
def getMemberPage(self, member_id, page=1, bookmark=False, tags=None): artist = None response = None if tags is not None: tags = PixivHelper.encode_tags(tags) else: tags = '' ## if True: limit = 24 offset = (page - 1) * limit need_to_slice = False if bookmark: # (url, response) = self.getMemberBookmarkWhiteCube(member_id, page, limit, tags) # https://www.pixiv.net/ajax/user/1039353/illusts/bookmarks?tag=&offset=0&limit=24&rest=show url = 'https://www.pixiv.net/ajax/user/{0}/illusts/bookmarks?tag={1}&offset={2}&limit={3}&rest=show'.format( member_id, tags, offset, limit) else: # https://www.pixiv.net/ajax/user/1813972/illusts/tag/Fate%2FGrandOrder?offset=0&limit=24 # https://www.pixiv.net/ajax/user/1813972/manga/tag/%E3%83%A1%E3%82%A4%E3%82%AD%E3%83%B3%E3%82%B0?offset=0&limit=24 # https://www.pixiv.net/ajax/user/1113943/illustmanga/tag/%E6%A5%B5%E4%B8%8A%E3%81%AE%E4%B9%B3?offset=0&limit=24 # https://www.pixiv.net/ajax/user/1813972/profile/all url = None if len(tags) > 0: url = 'https://www.pixiv.net/ajax/user/{0}/illustmanga/tag/{1}?offset={2}&limit={3}'.format( member_id, tags, offset, limit) elif self._config.r18mode: url = 'https://www.pixiv.net/ajax/user/{0}/illustmanga/tag/{1}?offset={2}&limit={3}'.format( member_id, 'R-18', offset, limit) else: url = 'https://www.pixiv.net/ajax/user/{0}/profile/all'.format( member_id) need_to_slice = True PixivHelper.print_and_log('info', 'Member Url: ' + url) if url is not None: # cache the response response = self.get_from_cache(url) if response is None: response = self.open(url).read() self.put_to_cache(url, response) PixivHelper.GetLogger().debug(response) artist = PixivModelWhiteCube.PixivArtist(member_id, response, False, offset, limit) self.getMemberInfoWhitecube(member_id, artist, bookmark) if artist.haveImages and need_to_slice: artist.imageList = artist.imageList[offset:offset + limit] ## else: ## if bookmark: ## member_url = 'https://www.pixiv.net/bookmark.php?id=' + str(member_id) + '&p=' + str(page) ## else: ## member_url = 'https://www.pixiv.net/member_illust.php?id=' + str(member_id) + '&p=' + str(page) ## ## if len(tags) > 0: ## member_url = member_url + "&tag=" + tags ## elif self._config.r18mode and not bookmark: ## member_url = member_url + '&tag=R-18' ## PixivHelper.print_and_log('info', 'R-18 Mode only.') ## PixivHelper.print_and_log('info', 'Member Url: ' + member_url) ## response = self.getPixivPage(member_url) ## artist = PixivModel.PixivArtist(mid=member_id, page=response) return (artist, response)
def getMemberPage(self, member_id, page=1, bookmark=False, tags=None): artist = None response = None if tags is not None: tags = PixivHelper.encode_tags(tags) else: tags = '' ## if True: limit = 24 offset = (page - 1) * limit need_to_slice = False if bookmark: # (url, response) = self.getMemberBookmarkWhiteCube(member_id, page, limit, tags) # https://www.pixiv.net/ajax/user/1039353/illusts/bookmarks?tag=&offset=0&limit=24&rest=show url = 'https://www.pixiv.net/ajax/user/{0}/illusts/bookmarks?tag={1}&offset={2}&limit={3}&rest=show'.format(member_id, tags, offset, limit) else: # https://www.pixiv.net/ajax/user/1813972/illusts/tag/Fate%2FGrandOrder?offset=0&limit=24 # https://www.pixiv.net/ajax/user/1813972/manga/tag/%E3%83%A1%E3%82%A4%E3%82%AD%E3%83%B3%E3%82%B0?offset=0&limit=24 # https://www.pixiv.net/ajax/user/1113943/illustmanga/tag/%E6%A5%B5%E4%B8%8A%E3%81%AE%E4%B9%B3?offset=0&limit=24 # https://www.pixiv.net/ajax/user/1813972/profile/all url = None if len(tags) > 0: url = 'https://www.pixiv.net/ajax/user/{0}/illustmanga/tag/{1}?offset={2}&limit={3}'.format(member_id, tags, offset, limit) elif self._config.r18mode: url = 'https://www.pixiv.net/ajax/user/{0}/illustmanga/tag/{1}?offset={2}&limit={3}'.format(member_id, 'R-18', offset, limit) else: url = 'https://www.pixiv.net/ajax/user/{0}/profile/all'.format(member_id) need_to_slice = True PixivHelper.print_and_log('info', 'Member Url: ' + url) if url is not None: # cache the response response = self.get_from_cache(url) if response is None: response = self.open(url).read() self.put_to_cache(url, response) PixivHelper.GetLogger().debug(response) artist = PixivModelWhiteCube.PixivArtist(member_id, response, False, offset, limit) self.getMemberInfoWhitecube(member_id, artist, bookmark) if artist.haveImages and need_to_slice: artist.imageList = artist.imageList[offset:offset + limit] ## else: ## if bookmark: ## member_url = 'https://www.pixiv.net/bookmark.php?id=' + str(member_id) + '&p=' + str(page) ## else: ## member_url = 'https://www.pixiv.net/member_illust.php?id=' + str(member_id) + '&p=' + str(page) ## ## if len(tags) > 0: ## member_url = member_url + "&tag=" + tags ## elif self._config.r18mode and not bookmark: ## member_url = member_url + '&tag=R-18' ## PixivHelper.print_and_log('info', 'R-18 Mode only.') ## PixivHelper.print_and_log('info', 'Member Url: ' + member_url) ## response = self.getPixivPage(member_url) ## artist = PixivModel.PixivArtist(mid=member_id, page=response) return (artist, response)