def get_video(url, cw=None): print_ = get_print(cw) check_alive(cw) data = cw.data_ version = data['version'] print_('version: {}'.format(version)) if version == '0.1': raise errors.OutdatedExtension() data = data['data'] if not isinstance(data, bytes): data = data.encode('utf8') s = base64.b64decode(data).decode('utf8') urls = json.loads(s) print_(u'\n'.join(urls[:4])) referer_seg = 'auto' if 'referer=force' in urls[0] else None # 1718 stream = M3u8_stream(url, urls=urls, n_thread=4, referer_seg=referer_seg) html = downloader.read_html(url) soup = Soup(html) url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content'] title = soup.find('meta', { 'property': 'og:title' }).attrs['content'].strip() video = Video(stream, url_thumb, url, title) return video
def process_ids(ids, info, imgs, cw, depth=0): print_ = get_print(cw) max_pid = get_max_range(cw) for i, id_illust in enumerate(ids): try: info_illust = get_info( 'https://www.pixiv.net/en/artworks/{}'.format(id_illust), cw, depth=depth + 1) except Exception as e: if depth == 0 and (e.args and e.args[0] == '不明なエラーが発生しました' or type(e) == errors.LoginRequired ): # logout during extraction raise e print_('process_ids error ({}):\n{}'.format( depth, print_error(e)[0])) continue imgs += info_illust['imgs'] s = '{} {} - {}'.format(tr_('읽는 중...'), info['title'], len(imgs)) if cw: cw.setTitle(s) else: print(s) if len(imgs) >= max_pid: break if depth == 0: check_alive(cw)
def get_imgs_channel(url, html=None, cw=None): print_ = get_print(cw) if html is None: html = downloader.read_html(url) info = get_info(url, html) # Range max_pid = get_max_range(cw) ids = set() imgs = [] for p in range(1000): url_api = 'https://bcy.net/apiv3/user/selfPosts?uid={}'.format( info['uid']) if imgs: url_api += '&since={}'.format(imgs[-1].id) data_raw = downloader.read_html(url_api, url) data = json.loads(data_raw)['data'] items = data['items'] if not items: print('no items') break c = 0 for item in items: check_alive(cw) id = item['item_detail']['item_id'] if id in ids: print('duplicate') continue c += 1 ids.add(id) url_single = u'https://bcy.net/item/detail/{}'.format(id) imgs_single = get_imgs(url_single, cw=cw) print_(str(id)) for p, img in enumerate(imgs_single): img = Image(img._url, url_single, id, p) imgs.append(img) s = u'{} {} - {}'.format(tr_(u'읽는 중...'), info['artist'], min(len(imgs), max_pid)) if cw: cw.setTitle(s) else: print(s) if len(imgs) >= max_pid: break if not c: print('not c') break if len(imgs) >= max_pid: print('over max_pid:', max_pid) break return imgs[:max_pid]
def read(self): file = None files = None title = None if '/users/' in self.url or '/user/' in self.url: type_ = 'videos' try: if self.url.split('/users/')[1].split('/')[1] == 'images': type_ = 'images' except: pass info = read_channel(self.url, type_, self.session, self.cw) title = info['title'] urls = info['urls'] if type_ == 'videos': files = [LazyFile(url, type_, self.session) for url in urls] file = self.process_playlist('[Channel] [{}] {}'.format(type_.capitalize(), title), files) elif type_ == 'images': #4499 files = [] for i, url in enumerate(urls): check_alive(self.cw) files += get_files(url, self.session, multi_post=True, cw=self.cw) #4728 self.title = '{} {} - {} / {}'.format(tr_('읽는 중...'), title, i, len(urls)) title = '[Channel] [{}] {}'.format(type_.capitalize(), title) else: raise NotImplementedError(type_) if file is None: if files is None: files = get_files(self.url, self.session, cw=self.cw) for file in files: self.urls.append(file.url) file = files[0] if file.type == 'youtube': raise errors.Invalid('[iwara] Youtube: {}'.format(self.url)) if file.type == 'image': self.single = False title = title or file.title if not self.single: title = clean_title(title) self.title = title if file.thumb is not None: self.setIcon(file.thumb)
def read_channel(url, type_, session, cw=None): print_ = get_print(cw) html = read_html(url, session=session) soup = Soup(html) if soup.find('div', id='block-mainblocks-user-connect'): username = re.find(r'''/messages/new\?user=(.+)['"]''', html, err='no username') else: username = re.find(r'/users/([^/]+)', url, err='no username') print_('username: {}'.format(username)) info = {} urls = [] urls_set = set() for p in range(50): check_alive(cw) url = 'https://ecchi.iwara.tv/users/{}/{}?page={}'.format( username, type_, p) print_(url) html = read_html(url, session=session) soup = Soup(html) if p == 0: title = soup.find('h1', class_='page-title').text info['title'] = title.replace("'s videos", '').replace("'s images", '').strip() view = soup.find('div', class_='view-content') if view is None: break urls_new = [] for div in view.findAll('div', class_='views-column'): href = div.find('a')['href'] url_video = urljoin(url, href) if url_video in urls_set: continue urls_set.add(url_video) urls_new.append(url_video) if not urls_new: break urls += urls_new info['urls'] = urls return info
def get_imgs_all(info, title, session, cw=None): print_ = get_print(cw) pages = info['pages'] pages = page_selector.filter(pages, cw) imgs = [] for p, page in enumerate(pages): imgs_already = get_imgs_already('daumtoon', title, page, cw) if imgs_already: imgs += imgs_already continue try: imgs += get_imgs(page, session, cw) except NotPaidError: print_('Not paid: {}'.format(page.title)) #3314 continue if cw is not None: cw.setTitle( tr_(u'\uc77d\ub294 \uc911... {} / {} ({}/{})').format( title, page.title, p + 1, len(pages))) check_alive(cw) return imgs
def get_ids(q, popular, cw): check_alive(cw) if q is None: if popular: url_api = 'https://j.nozomi.la/index-Popular.nozomi' else: url_api = 'https://j.nozomi.la/index.nozomi' else: if popular: url_api = 'https://j.nozomi.la/nozomi/popular/{}-Popular.nozomi'.format( quote(q)) else: url_api = 'https://j.nozomi.la/nozomi/{}.nozomi'.format(quote(q)) print(url_api) f = BytesIO() downloader.download(url_api, referer='https://nozomi.la/', buffer=f) data = f.read() ids = [] for i in range(0, len(data), 4): crop = data[i:i + 4] id = crop[0] * 16777216 + crop[1] * 65536 + crop[2] * 256 + crop[3] ids.append(id) return ids
def get_info(url, cw=None, depth=0): print_ = get_print(cw) api = PixivAPI() info = {} imgs = [] if utils.ui_setting: ugoira_ext = [None, '.gif', '.webp', '.png'][utils.ui_setting.ugoira_convert.currentIndex()] else: ugoira_ext = None if utils.ui_setting: format_ = compatstr(utils.ui_setting.pixivFormat.currentText()) else: format_ = 'id_ppage' max_pid = get_max_range(cw) if api.illust_id(url): # Single post id_ = api.illust_id(url) data = api.illust(id_) login = '******' not in data if FORCE_LOGIN and not login: # raise errors.LoginRequired() if data['xRestrict'] and not login: raise errors.LoginRequired('R-18') info['artist'] = data['userName'] info['artist_id'] = data['userId'] info['raw_title'] = data['illustTitle'] info['title'] = '{} (pixiv_illust_{})'.format(info['raw_title'], id_) info['create_date'] = parse_time(data['createDate']) tags_illust = set(tag['tag'] for tag in data['tags']['tags']) if tags_matched(tags_illust, cw): if data['illustType'] == 2: # ugoira data = api.ugoira_meta(id_) ugoira = { 'ext': ugoira_ext, 'delay': [frame['delay'] for frame in data['frames']], } img = Image(data['originalSrc'], url, id_, 0, format_, info, cw, ugoira=ugoira) imgs.append(img) else: data = api.pages(id_) for img in data: img = Image(img['urls']['original'], url, id_, len(imgs), format_, info, cw) imgs.append(img) else: print('tags mismatched') elif '/bookmarks/' in url or 'bookmark.php' in url: # User bookmarks id_ = api.user_id(url) if id_ is None: # id_ = my_id() process_user(id_, info, api) info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'], info['artist_id']) ids = [] ids_set = set() offset = 0 while len(ids) < max_pid: data = api.bookmarks(id_, offset) c = 0 for id in [work['id'] for work in data['works']]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break offset += LIMIT if depth == 0: check_alive(cw) process_ids(ids[:max_pid], info, imgs, cw, depth) elif '/tags/' in url or 'search.php' in url: # Search q = unquote( re.find(r'/tags/([^/]+)', url) or re.find('[?&]word=([^&]*)', url, err='no tags')) info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+')) qs = query_url(url) order = qs.get('order', ['date_d'])[0] mode = qs.get('mode', ['all'])[0] ids = [] ids_set = set() p = 1 while len(ids) < max_pid: data = api.search(q, order, mode, p=p) c = 0 for id in [ illust['id'] for illust in data['illustManga']['data'] if 'id' in illust ]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids[:max_pid], info, imgs, cw, depth) elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url: # Newest works: Following r18 = 'bookmark_new_illust_r18.php' in url id_ = my_id() process_user(id_, info, api) info['title'] = '{} (pixiv_following_{}{})'.format( info['artist'], 'r18_' if r18 else '', info['artist_id']) ids = [] ids_set = set() p = 1 while len(ids) < max_pid: c = 0 for id in api.following(p, r18=r18): if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids[:max_pid], info, imgs, cw, depth) elif api.user_id(url): # User illusts id_ = api.user_id(url) process_user(id_, info, api) data = api.profile(id_) info['title'] = '{} (pixiv_{})'.format(info['artist'], info['artist_id']) ids = [] for illusts in [data['illusts'], data['manga']]: if not illusts: continue ids += list(illusts.keys()) ids = sorted(ids, key=int, reverse=True) process_ids(ids[:max_pid], info, imgs, cw, depth) else: raise NotImplementedError() info['imgs'] = imgs[:max_pid] return info
def wait(cw): check_alive(cw)
def process_ids(ids, info, imgs, cw, depth=0, tags_add=None): print_ = get_print(cw) max_pid = get_max_range(cw) class Thread(threading.Thread): alive = True rem = 0 def __init__(self, queue): super().__init__(daemon=True) self.queue = queue @classmethod @lock def add_rem(cls, x): cls.rem += x def run(self): while self.alive: try: id_, res, i = self.queue.popleft() except Exception as e: sleep(.1) continue try: info_illust = get_info( 'https://www.pixiv.net/en/artworks/{}'.format(id_), cw, depth=depth + 1, tags_add=tags_add) res[i] = info_illust['imgs'] except Exception as e: if depth == 0 and (e.args and e.args[0] == '不明なエラーが発生しました' or type(e) == errors.LoginRequired ): # logout during extraction res[i] = e print_('process_ids error (id: {}, d:{}):\n{}'.format( id_, depth, print_error(e)[0])) finally: Thread.add_rem(-1) queue = deque() n, step = Downloader_pixiv.STEP print_('{} / {}'.format(n, step)) ts = [] for i in range(n): t = Thread(queue) t.start() ts.append(t) for i in range(0, len(ids), step): res = [[]] * step for j, id_illust in enumerate(ids[i:i + step]): queue.append((id_illust, res, j)) Thread.add_rem(1) while Thread.rem: sleep(.001, cw) for imgs_ in res: if isinstance(imgs_, Exception): raise imgs_ imgs += imgs_ s = '{} {} - {}'.format(tr_('읽는 중...'), info['title'], len(imgs)) if cw: cw.setTitle(s) else: print(s) if len(imgs) >= max_pid: break if depth == 0: check_alive(cw) for t in ts: t.alive = False
def get_info(url, cw=None, depth=0, tags_add=None): print_ = get_print(cw) api = PixivAPI() info = {} imgs = [] ugoira_ext = [None, '.gif', '.webp', '.png' ][utils.ui_setting.ugoira_convert.currentIndex( )] if utils.ui_setting else None format_ = compatstr(utils.ui_setting.pixivFormat.currentText() ) if utils.ui_setting else 'id_ppage' max_pid = get_max_range(cw) if api.illust_id(url): # Single post id_ = api.illust_id(url) data = api.illust(id_) login = '******' not in data if FORCE_LOGIN and not login: # raise errors.LoginRequired() if data['xRestrict'] and not login: raise errors.LoginRequired('R-18') info['artist'] = data['userName'] info['artist_id'] = data['userId'] info['raw_title'] = data['illustTitle'] info['title'] = '{} (pixiv_illust_{})'.format(info['raw_title'], id_) info['create_date'] = parse_time(data['createDate']) tags_illust = set(tag['tag'] for tag in data['tags']['tags']) if tags_matched(tags_illust, tags_add, cw): if data['illustType'] == 2: # ugoira data = api.ugoira_meta(id_) ugoira = { 'ext': ugoira_ext, 'delay': [frame['delay'] for frame in data['frames']], } img = Image(data['originalSrc'], url, id_, 0, format_, info, cw, ugoira=ugoira) imgs.append(img) else: data = api.pages(id_) for img in data: img = Image(img['urls']['original'], url, id_, len(imgs), format_, info, cw) imgs.append(img) else: print('tags mismatched') elif '/bookmarks/' in url or 'bookmark.php' in url: # User bookmarks id_ = api.user_id(url) if id_ is None: # id_ = my_id() if id_ == my_id(): rests = ['show', 'hide'] else: rests = ['show'] process_user(id_, info, api) info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'], info['artist_id']) ids = [] ids_set = set() for rest in rests: offset = 0 while len(ids) < max_pid: data = api.bookmarks(id_, offset, rest=rest) c = 0 for id in [work['id'] for work in data['works']]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break offset += LIMIT if depth == 0: check_alive(cw) process_ids(ids, info, imgs, cw, depth) elif '/tags/' in url or 'search.php' in url: # Search q = unquote( re.find(r'/tags/([^/]+)', url) or re.find('[?&]word=([^&]*)', url, err='no tags')) info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+')) qs = query_url(url) order = qs.get('order', ['date_d'])[0] mode = qs.get('mode', ['all'])[0] s_mode = qs.get('s_mode', ['s_tag_full'])[0] scd = qs.get('scd', [None])[0] ecd = qs.get('ecd', [None])[0] type_ = qs.get('type', ['all'])[0] wlt = qs.get('wlt', [None])[0] wgt = qs.get('wgt', [None])[0] hlt = qs.get('hlt', [None])[0] hgt = qs.get('hgt', [None])[0] blt = qs.get('blt', [None])[0] bgt = qs.get('bgt', [None])[0] ratio = qs.get('ratio', [None])[0] tool = qs.get('tool', [None])[0] logs = [ 'order: {}'.format(order), 'mode: {}'.format(mode), 's_mode: {}'.format(s_mode), 'scd / ecd: {} / {}'.format(scd, ecd), 'type: {}'.format(type_), 'wlt / wgt: {} / {}'.format(wlt, wgt), 'hlt / hgt: {} / {}'.format(hlt, hgt), 'blt / bgt: {} / {}'.format(blt, bgt), 'ratio: {}'.format(ratio), 'tool: {}'.format(tool), ] print_('\n'.join(logs)) ids = [] ids_set = set() p = 1 while len(ids) < max_pid: data = api.search(q, order, mode, p=p, s_mode=s_mode, scd=scd, ecd=ecd, type_=type_, wlt=wlt, wgt=wgt, hlt=hlt, hgt=hgt, blt=blt, bgt=bgt, ratio=ratio, tool=tool) c = 0 for id in [ illust['id'] for illust in data['illustManga']['data'] if 'id' in illust ]: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids, info, imgs, cw, depth) elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url: # Newest works: Following r18 = 'bookmark_new_illust_r18.php' in url id_ = my_id() process_user(id_, info, api) info['title'] = '{} (pixiv_following_{}{})'.format( info['artist'], 'r18_' if r18 else '', info['artist_id']) ids = [] ids_set = set() p = 1 while len(ids) < max_pid: data = api.following(p, r18=r18) c = 0 for id in data['page']['ids']: if id in ids_set: continue ids_set.add(id) ids.append(id) c += 1 if not c: break p += 1 process_ids(ids, info, imgs, cw, depth) elif api.user_id(url): # User illusts m = re.search(r'/users/[0-9]+/([\w]+)/?([^\?#/]*)', url) type_ = { 'illustrations': 'illusts', 'manga': 'manga' }.get(m and m.groups()[0]) if type_: types = [type_] else: types = ['illusts', 'manga'] if m: tag = unquote(m.groups()[1]) or None else: tag = None print_('types: {}, tag: {}'.format(types, tag)) id_ = api.user_id(url) process_user(id_, info, api) data = api.profile(id_) info['title'] = '{} (pixiv_{})'.format(info['artist'], info['artist_id']) ids = [] for type_ in types: illusts = data[type_] if not illusts: continue ids += list(illusts.keys()) ids = sorted(ids, key=int, reverse=True) if not ids: raise Exception('no imgs') process_ids(ids, info, imgs, cw, depth, tags_add=[tag] if tag else None) else: raise NotImplementedError() info['imgs'] = imgs[:max_pid] return info
def get_imgs_more(username, session, title, types, n=None, format='[%y-%m-%d] id_ppage', cw=None, mode='media', method='tab', imgs=None): print_ = get_print(cw) imgs = imgs or [] print_('imgs: {}, types: {}'.format(len(imgs), ', '.join(types))) artist, username = get_artist_username(username, session, cw) # # Range n = max(n or 0, get_max_range(cw)) ids_set = set(img.id for img in imgs) count_no_tweets = 0 count_no_imgs = 0 while check_alive(cw) or len(imgs) < n: if options.get('experimental') or count_no_tweets: #2687, #3392 filter_ = '' else: filter_ = ' filter:media' cache_guest_token = bool(count_no_tweets) if ids_set: max_id = min(ids_set) - 1 q = 'from:{} max_id:{} exclude:retweets{} -filter:periscope'.format( username, max_id, filter_) else: q = 'from:{} exclude:retweets{} -filter:periscope'.format( username, filter_) print(q) tweets = [] for tweet in list( TwitterAPI(session, cw, cache_guest_token).search(q)): id = int(tweet['id']) if id in ids_set: print_('duplicate: {}'.format(id)) continue ids_set.add(id) tweets.append(tweet) if tweets: exists_more_imgs = False for tweet in tweets: imgs_tweet = get_imgs_from_tweet(tweet, session, types, format, cw) if imgs_tweet: imgs += imgs_tweet exists_more_imgs = True if exists_more_imgs: count_no_imgs = 0 else: count_no_imgs += 1 if count_no_imgs >= RETRY_MORE_IMGS: #4130 break count_no_tweets = 0 else: count_no_tweets += 1 change_ua(session) if count_no_tweets >= RETRY_MORE: break print_('retry... {}'.format(count_no_tweets)) continue msg = '{} {} (@{}) - {}'.format(tr_('읽는 중...'), artist, username, len(imgs)) if cw: cw.setTitle(msg) else: print(msg) return imgs
def get_imgs(username, session, title, types, n=0, format='[%y-%m-%d] id_ppage', cw=None): print_ = get_print(cw) # Range n = max(n, get_max_range(cw)) # 2303 ids = set() names = dict() dir_ = os.path.join(get_outdir('twitter'), title) if os.path.isdir(dir_) and cw: for name in cw.names_old: name = os.path.basename(name) id_ = re.find('([0-9]+)_p', name) if id_ is None: continue if get_ext(name).lower() == '.mp4': type_ = 'video' else: type_ = 'img' if type_ not in types: continue id_ = int(id_) ids.add(id_) if id_ in names: names[id_].append(name) else: names[id_] = [name] ids_sure = sorted(ids)[:-100] max_id = max(ids_sure) if ids_sure else 0 #3201 # 2303 imgs_old = [] for id_ in sorted(ids, reverse=True): for p, file in enumerate( sorted(os.path.join(dir_, name) for name in names[id_])): img = Image(file, '', id_, 0, p, format, cw, False) img.url = LazyUrl_twitter(None, lambda _: file, img) img.filename = os.path.basename(file) imgs_old.append(img) imgs_new = [] enough = False c_old = 0 for tweet in TwitterAPI(session, cw).timeline_media(username): check_alive(cw) id_ = int(tweet['id_str']) if id_ < max_id: print_('enough') enough = True break if id_ in ids: print_('duplicate: {}'.format(id_)) c_old += 1 continue ids.add(id_) imgs_new += get_imgs_from_tweet(tweet, session, types, format, cw) if len(imgs_new) + c_old >= n: #3201 break msg = '{} {} - {}'.format(tr_('읽는 중...'), title, len(imgs_new)) if cw: cw.setTitle(msg) else: print(msg) if not enough and not imgs_new and c_old == 0: raise Exception('no imgs') imgs = sorted(imgs_old + imgs_new, key=lambda img: img.id, reverse=True) if len(imgs) < n: imgs = get_imgs_more(username, session, title, types, n, format, cw, imgs=imgs) return imgs[:n]
def get_imgs(url, title=None, range_=None, cw=None): if 'donmai.us/artists' in url: raise NotImplementedError('Not Implemented') if 'donmai.us/posts/' in url: raise NotImplementedError('Not Implemented') print_ = get_print(cw) # Range max_pid = get_max_range(cw) if range_ is None: range_ = range(1, 1001) print(range_) imgs = [] i = 0 empty_count = 0 empty_count_global = 0 url_imgs = set() while i < len(range_): check_alive(cw) p = range_[i] url = setPage(url, p) print_(url) soup = read_soup(url, cw) articles = soup.findAll('article') if articles: empty_count_global = 0 else: empty_count += 1 if empty_count < 4: s = 'empty page; retry... {}'.format(p) print_(s) continue else: empty_count = 0 empty_count_global += 1 if empty_count_global >= 6: break for article in articles: id = article.attrs['data-id'] #url_img = article.attrs['data-file-url'].strip() url_img = urljoin( url, article.find('a', class_='post-preview-link')['href']) #4160 #print(url_img) if url_img not in url_imgs: url_imgs.add(url_img) img = Image(id, url_img, cw) imgs.append(img) if len(imgs) >= max_pid: break if cw is not None: cw.setTitle('{} {} - {}'.format(tr_('읽는 중...'), title, len(imgs))) i += 1 return imgs[:max_pid]
def read_soup(url, cw): check_alive(cw) wait(cw) return downloader.read_soup(url)
def get_imgs(url, n_max=2000, title=None, cw=None, session=None): print_ = get_print(cw) for try_ in range(4): try: html = read_html(url, session, cw) m = re.search('"edge_owner_to_timeline_media":{"count":([0-9]+)', html) if m is None: raise Exception('Invalid page') break except Exception as e: e_ = e print_(print_error(e)[0]) else: raise e_ n = int(m.groups()[0]) n = min(n, n_max) data = get_sd(url, html=html, cw=cw) uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] csrf_token = data['config']['csrf_token'] # session.cookies.set(name='ig_pr', value='1', path='/', domain='.instagram.com') cursor = '' edges = [] bad = 0 while True: check_alive(cw) variables = { 'id': uploader_id, 'first': 12, } if cursor: variables['after'] = cursor #print_(variables)# media = None try: j = get_query('003056d32c2554def87228bc3fd9668a', variables, session, cw) media = j['data']['user']['edge_owner_to_timeline_media'] sleep(2) # except Exception as e: if bad > 10: raise Exception('no media') else: print_(u'no media.. retry... ({}) {}'.format( bad + 1, print_error(e)[0])) sleep(12 * bad, cw) bad += 1 continue bad = 0 edges_new = media.get('edges') if not edges_new or not isinstance(edges_new, list): print('no edges_new') break edges += edges_new s = u'{} {} ({}/{})'.format(tr_(u'읽는 중...'), title, len(edges), n) if cw is not None: cw.setTitle(s) if not cw.alive: return [] else: print(s) if len(edges) >= n: break page_info = media.get('page_info') if not page_info: break if not page_info.get('has_next_page'): break cursor = page_info.get('end_cursor') if not cursor: break if len(edges) <= n / 2: raise Exception(u'Too short: {} / {}'.format(len(edges), n)) imgs = [] for edge in edges: node = edge['node'] type = node['__typename'] id = node['shortcode'] url = u'https://www.instagram.com/p/{}/'.format(id) ## if type in ['GraphVideo', 'GraphImage']: ## single = True ## else: ## single = False for img in Node(url, session=session, cw=cw, media=node).imgs: imgs.append(img) if len(imgs) >= n_max: break return imgs