def fix_url(url, session=None, cw=None): print_ = get_print(cw) if '&manga_name=' not in url: return url print_('fix url') qs = query_url(url) name = qs['manga_name'][0].replace('+', ' ') url_search = urljoin(url, '/bbs/search.php') url_search = update_url_query(url_search, {'stx': [name]}) print(url_search) html = read_html(url_search, session=session) soup = Soup(html) posts = soup.findAll('div', class_='post-row') print_(('posts:').format(len(posts))) if len(posts) != 1: return url for a in posts[0].findAll('a'): href = urljoin(url, a.attrs['href']) if 'manga_detail' in href: break else: raise Exception('Failed to find link') if cw is not None: cw.gal_num = href return href
def fix_url(cls, url): # 2033 if not re.match('https?://.+', url, re.IGNORECASE): url = 'https://www.youtube.com/watch?v={}'.format(url) qs = query_url(url) if 'v' in qs: url = url.split('?')[0] + '?v={}'.format(qs['v'][0]) return url
def get_types(self): return set() # legacy; #2653 types = set() for t in query_url(self.url).get('type', []): t = t.lower() types.add(t) return types
def read(self): qs = query_url(self.url) for key in qs: if key.lower() in ('file', 'filename'): name = qs[key][(-1)] break else: name = os.path.basename(self.url) for esc in ['?', '#']: name = name.split(esc)[0] ext = get_ext(name) if not ext: try: ext = downloader.get_ext(self.url) except: ext = '' name = os.path.splitext(name)[0] self.urls.append(self.url) id_ = md5(self.url.encode('utf8')).hexdigest()[:8] tail = ' ({}){}'.format(id_, ext) filename = clean_title(name, n=-len(tail)) + tail self.filenames[self.url] = filename self.title = filename
def read(self): if '/post/' in self.url: raise errors.Invalid( tr_('개별 다운로드는 지원하지 않습니다: {}').format(self.url)) self._popular = 'search-Popular.' in self.url self.title = clean_title(self.name) qs = query_url(self.url) q = qs['q'][0] for id in get_ids_multi(q, self._popular, self.cw): img = Image(id, self.url) self.urls.append(img.url)
def id(self): if self.type_sankaku == 'www': id = u'[www] ' + self.soup.find('h1', class_='entry-title').text.strip() else: qs = query_url(self.url) tags = qs.get('tags', []) tags.sort() id = u' '.join(tags) if not id: id = u'N/A' id = '[{}] '.format(self.type_sankaku) + id return clean_title(id)
def get_tags(url): url = clean_url(url) qs = query_url(url) if 'page=favorites' in url: id = qs.get('id', ['N/A'])[0] id = u'fav_{}'.format(id) else: tags = qs.get('tags', []) tags.sort() id = u' '.join(tags) if not id: id = u'N/A' return id
def get_page(url): qs = query_url(url) page = qs.get('p') if page: page = int(page[0]) else: page = re.findall('_p([0-9]+)', url) if page: page = int(page[0]) else: page = None if page == 1: page = None return page
def get_id(url): if '/dashboard/blog/' in url: url = re.find('/dashboard/blog/([0-9a-zA-Z_-]+)', url) if '/login_required/' in url: url = url.split('/login_required/')[1].split('?')[0].split('/')[0] if 'tumblr.com/blog/view/' in url: url = url.split('tumblr.com/blog/view/')[1] if 'tumblr.com' in url: if 'www.tumblr.com' in url: qs = query_url(url) url = qs.get('url', [url])[0] url = url.split('.tumblr.com')[0].split('/')[(-1)] if url == 'www': raise Exception('no id') return url
def fix_url(cls, url): if 'pornhub_gif_' in url: url = 'https://www.pornhub.com/gif/{}'.format( url.replace('pornhub_gif_', '')) elif 'pornhub_album_' in url: url = 'https://www.pornhub.com/album/{}'.format( url.replace('pornhub_album_', '')) elif 'pornhub_' in url: url = 'https://www.pornhub.com/view_video.php?viewkey={}'\ .format(url.replace('pornhub_', '')) if '/authenticate/goToLoggedIn' in url: qs = utils.query_url(url) url = urljoin(url, qs['url'][0]) url = url.replace('pornhubthbh7ap3u.onion', 'pornhub.com') return url
def id(self): if self.type_sankaku == 'www': id = '[www] ' + self.soup.find('h1', class_='entry-title').text.strip() else: if '/post/show/' in self.url: id = get_id(self.url) else: qs = query_url(self.url) tags = qs.get('tags', []) tags.sort() id = ' '.join(tags) if not id: id = 'N/A' id = '[{}] {}'.format(self.type_sankaku, id) return clean_title(id)
def read(self): cw = self.cw title = self.url if self.url.startswith('magnet:'): qs = utils.query_url(self.url) if 'dn' in qs: self._dn = qs['dn'][0] info = getattr(cw, 'info?', None) if info is not None: self.print_('cached info') self._info = info if self._info is None: try: self._info = torrent.get_info(self.url, cw, timeout=TIMEOUT, callback=self.callback) if CACHE_INFO: setattr(cw, 'info?', self._info) except Exception as e: self.update_pause() if not cw.paused: raise errors.Invalid('Faild to read metadata: {}'.format(self.url), fail=True) if self._info is None: cw.paused = True if cw.paused: return hash_ = self._info.hash.hex() self.print_('v2: {}'.format(self._info.v2)) self.print_('Hash: {}'.format(hash_)) if not self._info.v2: self.url = 'magnet:?xt=urn:btih:{}'.format(hash_)# date = datetime.fromtimestamp(self._info.creation_date()) date = date.strftime('%y-%m-%d %H:%M:%S') self.print_('Created on: {}'.format(date)) self.print_('Total size: {}'.format(fs.size(self._info.total_size()))) self.print_('Pieces: {} x {}'.format(self._info.num_pieces(), fs.size(self._info.piece_length()))) self.print_('Creator: {}'.format(self._info.creator())) self.print_('Comment: {}'.format(self._info.comment())) cw.setTotalFileSize(self._info.total_size()) cw.imgs.clear() cw.dones.clear() self.urls = [self.url] self.title = self.name self.update_files() cw.pbar.show()
def read(self): qs = query_url(self.url) for key in qs: if key.lower() in ('file', 'filename'): name = qs[key][(-1)] break else: name = os.path.basename(self.url) for esc in ['?', '#']: name = name.split(esc)[0] if not get_ext(name): name += downloader.get_ext(self.url) self.urls.append(self.url) self.filenames[self.url] = clean_title(name) self.title = name
def get_dn(cls, url): if url.startswith('magnet:'): qs = utils.query_url(url) if 'dn' in qs: return utils.html_unescape(qs['dn'][0])
def get_info(url, cw=None, depth=0): print_ = get_print(cw) api = PixivAPI() info = {} imgs = [] if utils.ui_setting: ugoira_ext = [None, '.gif', '.webp', '.png'][utils.ui_setting.ugoira_convert.currentIndex()] else: ugoira_ext = None if utils.ui_setting: format_ = compatstr(utils.ui_setting.pixivFormat.currentText()) else: format_ = 'id_ppage' max_pid = get_max_range(cw) if api.illust_id(url): # Single post id_ = api.illust_id(url) data = api.illust(id_) login = '******' not in data if FORCE_LOGIN and not login: # raise errors.LoginRequired() if data['xRestrict'] and not login: raise errors.LoginRequired('R-18') info['artist'] = data['userName'] info['artist_id'] = data['userId'] info['raw_title'] = data['illustTitle'] info['title'] = '{} (pixiv_illust_{})'.format(info['raw_title'], id_) info['create_date'] = parse_time(data['createDate']) tags_illust = set(tag['tag'] for tag in data['tags']['tags']) if tags_matched(tags_illust, cw): if data['illustType'] == 2: # ugoira data = api.ugoira_meta(id_) ugoira = { 'ext': ugoira_ext, 'delay': [frame['delay'] for frame in data['frames']], } img = Image(data['originalSrc'], url, id_, 0, format_, info, cw, ugoira=ugoira) imgs.append(img) else: data = api.pages(id_) for img in data: img = Image(img['urls']['original'], url, id_, len(imgs), format_, info, cw) imgs.append(img) else: print('tags mismatched') elif '/bookmarks/' in url or 'bookmark.php' in url: # User bookmarks id_ = api.user_id(url) if id_ is None: # id_ = my_id() process_user(id_, info, api) info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'], info['artist_id']) ids = [] ids_set = set() offset = 0 while len(ids) < max_pid: data = api.bookmarks(id_, offset) c = 0 for id in [work['id'] for work in data['works']]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break offset += LIMIT if depth == 0: check_alive(cw) process_ids(ids[:max_pid], info, imgs, cw, depth) elif '/tags/' in url or 'search.php' in url: # Search q = unquote( re.find(r'/tags/([^/]+)', url) or re.find('[?&]word=([^&]*)', url, err='no tags')) info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+')) qs = query_url(url) order = qs.get('order', ['date_d'])[0] mode = qs.get('mode', ['all'])[0] ids = [] ids_set = set() p = 1 while len(ids) < max_pid: data = api.search(q, order, mode, p=p) c = 0 for id in [ illust['id'] for illust in data['illustManga']['data'] if 'id' in illust ]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids[:max_pid], info, imgs, cw, depth) elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url: # Newest works: Following r18 = 'bookmark_new_illust_r18.php' in url id_ = my_id() process_user(id_, info, api) info['title'] = '{} (pixiv_following_{}{})'.format( info['artist'], 'r18_' if r18 else '', info['artist_id']) ids = [] ids_set = set() p = 1 while len(ids) < max_pid: c = 0 for id in api.following(p, r18=r18): if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids[:max_pid], info, imgs, cw, depth) elif api.user_id(url): # User illusts id_ = api.user_id(url) process_user(id_, info, api) data = api.profile(id_) info['title'] = '{} (pixiv_{})'.format(info['artist'], info['artist_id']) ids = [] for illusts in [data['illusts'], data['manga']]: if not illusts: continue ids += list(illusts.keys()) ids = sorted(ids, key=int, reverse=True) process_ids(ids[:max_pid], info, imgs, cw, depth) else: raise NotImplementedError() info['imgs'] = imgs[:max_pid] return info
def read(self): type = self.pixiv_type cw = self.customWidget print_ = cw.print_ ui_setting = self.ui_setting if type == 'following': raise NotImplementedError('following') self._format = [None, 'gif', 'webp', 'png'][ui_setting.ugoira_convert.currentIndex()] self._format_name = compatstr(ui_setting.pixivFormat.currentText()) types = [t.lower() for t in query_url(self.url).get('type', [])] if types: s = (u', ').join(sorted(types)) types = set(types) else: s = 'all' types = None print_((u'Type: {}').format(s)) print_((u'info: {}').format(self.info)) api = self.api query = self.id.replace('_bmk', '').replace('_illust', '').replace( 'pixiv_', '').replace('search_', '') if type != 'search': query = int(query) print('pixiv_query:', query) try: if type in ('user', 'bookmark', 'search'): max_pid = get_max_range(cw, 2000) if ui_setting.groupBox_tag.isChecked(): tags = [ compatstr(ui_setting.tagList.item(i).text()) for i in range(ui_setting.tagList.count()) ] else: tags = [] if type == 'search': query = query.replace('+', ' ') name = query else: id = self.id.replace('_bmk', '').replace('pixiv_', '').replace( 'search_', '') print('name', id) name = get_name(id, self.api, cw=cw) cw.artist = name title = u'{} ({})'.format(name, self.id) print_(title) dir = os.path.join(get_outdir('pixiv'), clean_title(title)) imgs = get_imgs(query, type=type, api=api, n=max_pid, tags=tags, types=types, format=self._format, format_name=self._format_name, dir=dir, cw=cw, title=title, info=self.info) elif type == 'illust': for try_ in range(N_TRY): try: detail = api.illust_detail(query, req_auth=True) error = detail.get('error') if error: raise PixivError(error) break except PixivError as e: api = e.api print_(e) if try_ < N_TRY - 1: print_('retry...') sleep(SLEEP) else: raise illust = detail.illust name = illust.title title = (u'{} ({})').format(name, self.id) dir = os.path.join(get_outdir('pixiv'), clean_title(title)) imgs = get_imgs_from_illust(illust, api=api, format=self._format, dir=dir, cw=cw, format_name=self._format_name) except PixivError as e: msg = (u'PixivError: {}').format(e.message) return self.Invalid(msg) self.imgs = imgs for img in imgs: self.urls.append(img.url) self.filenames[img.url] = img.filename self.title = clean_title(title) # 1390
def route_search(): # Get query text_query = flask.request.args.get('q', None) page_num = flask.request.args.get('page', 1) show_syntax = flask.request.args.get('show_syntax', 0) has_more_items = False expires = 0 # Fail if bad parameters provided try: page_num = int(page_num) show_syntax = bool(int(show_syntax)) except ValueError: flask.abort(400) # Search page if text_query is not None: query = Search.query(text_query, page_num, offset=(page_num - 1) * app.config['ITEMS_PER_PAGE'], count=app.config['ITEMS_PER_PAGE'] + 1) items = query.all() if len(items) == app.config['ITEMS_PER_PAGE'] + 1: items = items[:-1] has_more_items = True query_title = (text_query if len(text_query) > 0 else 'HN Firehose') meta_og_title = u'hnapp search: "%s"' % query_title title = u'%s – hnapp' % query_title # Front page else: query = None items = None title = u'hnapp – Hacker News Search With RSS & JSON Feeds' meta_og_title = u'hnapp – Hacker News RSS' # Meta SEO tags meta_keywords = u'Hacker News,RSS,hnapp' meta_description = u'Get only the stories and comments you want. Follow users, keywords, jobs, mentions of your product, etc.' # Get format if flask.request.path == '/': output_format = None html_template = 'search.html' else: if text_query is None: flask.abort(400) output_format = flask.request.path[1:] if output_format == 'bare': html_template = 'parts/items.html' # Those users who created their filters on alpha version new.hnapp.com # don't expect comments, so we fix it for them if flask.request.args.get('legacy', 0) == '1': # Not a search – redirect to home page if text_query is None: return flask.redirect(query_url(None, output_format=None), code=302) else: return flask.redirect(query_url('type:story ' + text_query, output_format=output_format), code=302) # Web page or bare HTML if output_format in (None, 'bare'): page_data = { 'is_app': True, 'title': title, 'meta_og_title': meta_og_title, 'meta_keywords': meta_keywords, 'meta_description': meta_description, 'show_syntax': show_syntax, 'query': text_query, 'items': items, 'has_more_items': int(has_more_items), 'page_expires_at': int(time.time()) + 60 * 5, # Cache pages for this many seconds 'this_url': flask.request.url, 'prev_url': query_url(text_query, page_num=page_num - 1) if query is not None else None, 'next_url': query_url(text_query, page_num=page_num + 1) if query is not None else None, 'rss_url': query_url(text_query, output_format='rss') if query is not None else None, 'json_url': query_url(text_query, output_format='json') if query is not None else None, 'page_num': page_num, 'ga_id': app.config['GA_ID'], 'HOST_NAME': app.config['HOST_NAME'] } return flask.render_template(html_template, **page_data) # RSS elif output_format == 'rss': feed = AtomFeed(title=title.encode('ascii', 'xmlcharrefreplace'), title_type='html', feed_url=query_url(text_query, output_format='rss'), author='via hnapp', url=query_url(text_query), generator=('hnapp', app.config['HOST_NAME'], None)) for item in items: feed.add(**item.feed_entry()) return feed.get_response() # JSON elif output_format == 'json': feed = {} feed['has_more_items'] = has_more_items feed['query'] = text_query feed['items'] = [item.json_entry() for item in items] return flask.jsonify(**feed) # output_format defined but query is not # not supposed to happen, we checked for this above else: flask.abort(500)
def route_search(): # Get query text_query = flask.request.args.get('q', None) page_num = flask.request.args.get('page', 1); show_syntax = flask.request.args.get('show_syntax', 0) has_more_items = False expires = 0 # Fail if bad parameters provided try: page_num = int(page_num) show_syntax = bool(int(show_syntax)) except ValueError: flask.abort(400) # Search page if text_query is not None: query = Search.query(text_query, page_num, offset=(page_num-1)*app.config['ITEMS_PER_PAGE'], count=app.config['ITEMS_PER_PAGE']+1 ) items = query.all() if len(items) == app.config['ITEMS_PER_PAGE']+1: items = items[:-1] has_more_items = True query_title = (text_query if len(text_query) > 0 else 'HN Firehose') meta_og_title = u'hnapp search: "%s"' % query_title title = u'%s – hnapp' % query_title # Front page else: query = None items = None title = u'hnapp – Hacker News Search With RSS & JSON Feeds' meta_og_title = u'hnapp – Hacker News RSS' # Meta SEO tags meta_keywords = u'Hacker News,RSS,hnapp' meta_description = u'Get only the stories and comments you want. Follow users, keywords, jobs, mentions of your product, etc.' # Get format if flask.request.path == '/': output_format = None html_template = 'search.html' else: if text_query is None: flask.abort(400) output_format = flask.request.path[1:] if output_format == 'bare': html_template = 'parts/items.html' # Those users who created their filters on alpha version new.hnapp.com # don't expect comments, so we fix it for them if flask.request.args.get('legacy', 0) == '1': # Not a search – redirect to home page if text_query is None: return flask.redirect(query_url(None, output_format=None), code=302) else: return flask.redirect(query_url('type:story '+text_query, output_format=output_format), code=302) # Web page or bare HTML if output_format in (None, 'bare'): page_data = { 'is_app': True, 'title': title, 'meta_og_title': meta_og_title, 'meta_keywords': meta_keywords, 'meta_description': meta_description, 'show_syntax': show_syntax, 'query': text_query, 'items': items, 'has_more_items': int(has_more_items), 'page_expires_at': int(time.time()) + 60*5, # Cache pages for this many seconds 'this_url': flask.request.url, 'prev_url': query_url(text_query, page_num=page_num-1) if query is not None else None, 'next_url': query_url(text_query, page_num=page_num+1) if query is not None else None, 'rss_url': query_url(text_query, output_format='rss') if query is not None else None, 'json_url': query_url(text_query, output_format='json') if query is not None else None, 'page_num': page_num, 'ga_id': app.config['GA_ID'], 'HOST_NAME': app.config['HOST_NAME'] } return flask.render_template(html_template, **page_data) # RSS elif output_format == 'rss': feed = AtomFeed(title=title.encode('ascii', 'xmlcharrefreplace'), title_type='html', feed_url=query_url(text_query, output_format='rss'), author='via hnapp', url=query_url(text_query), generator=('hnapp', app.config['HOST_NAME'], None) ) for item in items: feed.add(**item.feed_entry()) return feed.get_response() # JSON elif output_format == 'json': feed = {} feed['has_more_items'] = has_more_items feed['query'] = text_query feed['items'] = [item.json_entry() for item in items] return flask.jsonify(**feed) # output_format defined but query is not # not supposed to happen, we checked for this above else: flask.abort(500)
def init(self): self.url = clean_url(self.url) url = self.url # Determine the type if 'bookmark.php?type=user' in url or url.startswith(headers['following']): type = 'following' elif 'bookmark.php' in url or url.startswith(headers['bookmark']) or '/bookmarks/' in url: type = 'bookmark' elif 'illust_id=' in url or url.startswith(headers['illust']) or '/artworks/' in url: type = 'illust' elif 'search.php' in url or url.startswith(headers['search']): type = 'search' order = query_url(url).get('order', ['date_d'])[0] # data_d, date, popular_d, popular_male_d, popular_female_d scd = query_url(url).get('scd', [None])[0] # 2019-09-27 ecd = query_url(url).get('ecd', [None])[0] # 2019-09-28 blt = query_url(url).get('blt', [None])[0] # 5000 bgt = query_url(url).get('bgt', [None])[0] # 9999 type_ = query_url(url).get('type', [None])[0] # None (all), illust, manga, ugoira self.info = {'order': order, 'scd': scd, 'ecd': ecd, 'blt': blt, 'bgt': bgt, 'type': type_} elif '/tags/' in url: type = 'search' order = query_url(url).get('order', ['date_d'])[0] scd = query_url(url).get('scd', [None])[0] ecd = query_url(url).get('ecd', [None])[0] blt = query_url(url).get('blt', [None])[0] bgt = query_url(url).get('bgt', [None])[0] type_ = query_url(url).get('type', [None])[0] # None (all), illust, manga, ugoira if type_ is None: try: type_ = url.split('/tags/')[1].split('/')[1] except IndexError: type_ = None type_ = {'illustrations': 'illust'}.get(type_, type_) self.info = {'order': order, 'scd': scd, 'ecd': ecd, 'blt': blt, 'bgt': bgt, 'type': type_} elif 'id=' in url and 'mode=' not in url or url.startswith(headers['user']) or 'pixiv.me' in url or '/users/' in url: type = 'user' else: self.Invalid((u'[pixiv] Can not determine type: {}').format(url)) return 'stop' header = headers[type] if 'pixiv.net' in url or 'pixiv.me' in url: if not url.startswith('http://') and not url.startswith('https://'): url = u'https://' + url self.url = url else: url = url.replace('bmk_', '').replace('illust_', '').replace('pixiv_', '').replace('search_', '') if type == 'user': url = 'https://www.pixiv.net/member_illust.php?id={}'.format(url) elif type == 'bookmark': url = 'https://www.pixiv.net/bookmark.php?id={}'.format(url) elif type == 'illust': url = 'https://www.pixiv.net/member_illust.php?mode=medium&illust_id={}'.format(url) elif type == 'search': url = 'https://www.pixiv.net/search.php?s_mode=s_tag&word={}'.format(url) url = clean_url(url) else: self.Invalid('{}{}: ???'.format(header, url)) return 'stop' self.url = url self.print_('PIXIV_TYPE: {}'.format(type)) self.pixiv_type = type try: self.api = pixiv_auth.get_api() if 'error' in self.api.user_detail(11): self.api = pixiv_auth.get_api(force=True) except Exception as e: self.print_(print_error(e)[0]) self.Invalid(tr_('로그인 실패: {}{}\n[옵션 - 설정 - 픽시브 설정 - 로그인] 에서 설정해주세요.').format(header, url)) return 'stop'
def fix_url(cls, url): # 2033 qs = query_url(url) if 'v' in qs: url = url.split('?')[0] + '?v={}'.format(qs['v'][0]) return url
def name(self): qs = query_url(self.url) name = qs['q'][0] if self._popular: name += ' - Popular' return name
def get_info(url, cw=None, depth=0, tags_add=None): print_ = get_print(cw) api = PixivAPI() info = {} imgs = [] ugoira_ext = [None, '.gif', '.webp', '.png' ][utils.ui_setting.ugoira_convert.currentIndex( )] if utils.ui_setting else None format_ = compatstr(utils.ui_setting.pixivFormat.currentText() ) if utils.ui_setting else 'id_ppage' max_pid = get_max_range(cw) if api.illust_id(url): # Single post id_ = api.illust_id(url) data = api.illust(id_) login = '******' not in data if FORCE_LOGIN and not login: # raise errors.LoginRequired() if data['xRestrict'] and not login: raise errors.LoginRequired('R-18') info['artist'] = data['userName'] info['artist_id'] = data['userId'] info['raw_title'] = data['illustTitle'] info['title'] = '{} (pixiv_illust_{})'.format(info['raw_title'], id_) info['create_date'] = parse_time(data['createDate']) tags_illust = set(tag['tag'] for tag in data['tags']['tags']) if tags_matched(tags_illust, tags_add, cw): if data['illustType'] == 2: # ugoira data = api.ugoira_meta(id_) ugoira = { 'ext': ugoira_ext, 'delay': [frame['delay'] for frame in data['frames']], } img = Image(data['originalSrc'], url, id_, 0, format_, info, cw, ugoira=ugoira) imgs.append(img) else: data = api.pages(id_) for img in data: img = Image(img['urls']['original'], url, id_, len(imgs), format_, info, cw) imgs.append(img) else: print('tags mismatched') elif '/bookmarks/' in url or 'bookmark.php' in url: # User bookmarks id_ = api.user_id(url) if id_ is None: # id_ = my_id() if id_ == my_id(): rests = ['show', 'hide'] else: rests = ['show'] process_user(id_, info, api) info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'], info['artist_id']) ids = [] ids_set = set() for rest in rests: offset = 0 while len(ids) < max_pid: data = api.bookmarks(id_, offset, rest=rest) c = 0 for id in [work['id'] for work in data['works']]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break offset += LIMIT if depth == 0: check_alive(cw) process_ids(ids, info, imgs, cw, depth) elif '/tags/' in url or 'search.php' in url: # Search q = unquote( re.find(r'/tags/([^/]+)', url) or re.find('[?&]word=([^&]*)', url, err='no tags')) info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+')) qs = query_url(url) order = qs.get('order', ['date_d'])[0] mode = qs.get('mode', ['all'])[0] s_mode = qs.get('s_mode', ['s_tag_full'])[0] scd = qs.get('scd', [None])[0] ecd = qs.get('ecd', [None])[0] type_ = qs.get('type', ['all'])[0] wlt = qs.get('wlt', [None])[0] wgt = qs.get('wgt', [None])[0] hlt = qs.get('hlt', [None])[0] hgt = qs.get('hgt', [None])[0] blt = qs.get('blt', [None])[0] bgt = qs.get('bgt', [None])[0] ratio = qs.get('ratio', [None])[0] tool = qs.get('tool', [None])[0] logs = [ 'order: {}'.format(order), 'mode: {}'.format(mode), 's_mode: {}'.format(s_mode), 'scd / ecd: {} / {}'.format(scd, ecd), 'type: {}'.format(type_), 'wlt / wgt: {} / {}'.format(wlt, wgt), 'hlt / hgt: {} / {}'.format(hlt, hgt), 'blt / bgt: {} / {}'.format(blt, bgt), 'ratio: {}'.format(ratio), 'tool: {}'.format(tool), ] print_('\n'.join(logs)) ids = [] ids_set = set() p = 1 while len(ids) < max_pid: data = api.search(q, order, mode, p=p, s_mode=s_mode, scd=scd, ecd=ecd, type_=type_, wlt=wlt, wgt=wgt, hlt=hlt, hgt=hgt, blt=blt, bgt=bgt, ratio=ratio, tool=tool) c = 0 for id in [ illust['id'] for illust in data['illustManga']['data'] if 'id' in illust ]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids, info, imgs, cw, depth) elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url: # Newest works: Following r18 = 'bookmark_new_illust_r18.php' in url id_ = my_id() process_user(id_, info, api) info['title'] = '{} (pixiv_following_{}{})'.format( info['artist'], 'r18_' if r18 else '', info['artist_id']) ids = [] ids_set = set() p = 1 while len(ids) < max_pid: data = api.following(p, r18=r18) c = 0 for id in data['page']['ids']: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids, info, imgs, cw, depth) elif api.user_id(url): # User illusts m = re.search(r'/users/[0-9]+/([\w]+)/?([^\?#/]*)', url) type_ = { 'illustrations': 'illusts', 'manga': 'manga' }.get(m and m.groups()[0]) if type_: types = [type_] else: types = ['illusts', 'manga'] if m: tag = unquote(m.groups()[1]) or None else: tag = None print_('types: {}, tag: {}'.format(types, tag)) id_ = api.user_id(url) process_user(id_, info, api) data = api.profile(id_) info['title'] = '{} (pixiv_{})'.format(info['artist'], info['artist_id']) ids = [] for type_ in types: illusts = data[type_] if not illusts: continue ids += list(illusts.keys()) ids = sorted(ids, key=int, reverse=True) if not ids: raise Exception('no imgs') process_ids(ids, info, imgs, cw, depth, tags_add=[tag] if tag else None) else: raise NotImplementedError() info['imgs'] = imgs[:max_pid] return info