def get_videos(url, cw=None): print_ = get_print(cw) info = {} user_id = re.find(r'twitch.tv/([^/?]+)', url, err='no user_id') print(user_id) session = Session() r = session.get(url) s = cut_pair(re.find(r'headers *: *({.*)', r.text, err='no headers')) print(s) headers = json_loads(s) payload = [{ 'operationName': 'ClipsCards__User', 'variables': { 'login': user_id, 'limit': 20, 'criteria': { 'filter': 'ALL_TIME' } }, 'extensions': { 'persistedQuery': { 'version': 1, 'sha256Hash': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777' } }, }] videos = [] cursor = None cursor_new = None while True: if cursor: payload[0]['variables']['cursor'] = cursor r = session.post('https://gql.twitch.tv/gql', json=payload, headers=headers) #print(r) data = r.json() for edge in data[0]['data']['user']['clips']['edges']: url_video = edge['node']['url'] info['name'] = edge['node']['broadcaster']['displayName'] video = Video(url_video) video.id = int(edge['node']['id']) videos.append(video) cursor_new = edge['cursor'] print_('videos: {} / cursor: {}'.format(len(videos), cursor)) if cursor == cursor_new: print_('same cursor') break if cursor_new is None: break cursor = cursor_new if not videos: raise Exception('no videos') info['videos'] = sorted(videos, key=lambda video: video.id, reverse=True) return info
def get_imgs(id, title, session, type=None, cw=None): print_ = get_print(cw) if type is None: type = 'projects' referer = 'https://www.artstation.com/{}'.format(id) html = downloader.read_html(referer, session=session) print(session.cookies.keys()) datas = [] p = 1 while p < 1000: url = 'https://www.artstation.com/users/{}/{}.json?page={}'.format( id, type, p) print(url) for try_ in range(4): try: html = downloader.read_html(url, session=session, referer=referer) break except Exception as e: print(e) else: raise j = json.loads(html) data = j['data'] if not data: break datas += data if cw: if not cw.alive: return [] cw.setTitle(('{} {} - {}').format(tr_('페이지 읽는 중...'), title, len(datas))) else: print(len(datas)) p += 1 imgs = [] i = 0 while i < len(datas): data = datas[i] date = data['created_at'][2:10] post_url = data['permalink'] print('post_url', post_url) id_art = get_id_art(post_url) imgs += get_imgs_page(id_art, session, date=date, cw=cw) if cw: if not cw.alive: return [] cw.setTitle(('{} {} - {}').format(tr_('이미지 읽는 중...'), title, len(imgs))) else: print(len(imgs)) i += 1 return imgs
def get_imgs_page(id_art, session, date=None, cw=None): print_ = get_print(cw) url_json = 'https://www.artstation.com/projects/{}.json'.format(id_art) post_url = 'https://www.artstation.com/artwork/{}'.format(id_art) try: html = downloader.read_html(url_json, session=session, referer=post_url) data = json.loads(html) imgs_ = data['assets'] except Exception as e: print_(print_error(e)[(-1)]) return [] if date is None: date = data['created_at'][2:10] imgs = [] for page, img in enumerate(imgs_): if not img['has_image']: print('no img') continue url = None video = None embed = img.get('player_embedded') if embed: soup = Soup(embed) url_embed = soup.find('iframe').attrs['src'] print_('embed: {}'.format(url_embed)) try: html = downloader.read_html(url_embed, session=session, referer=post_url) soup = Soup(html) url = soup.find('video').find('source').attrs['src'] except Exception as e: pass if not url: try: url = soup.find('link', {'rel': 'canonical'}).attrs['href'] print_('YouTube: {}'.format(url)) raise Exception('YouTube') ## from extractor import youtube_downloader ## video = youtube_downloader.Video(url, cw=cw) except Exception as e: print(e) url = None if not url: url = img['image_url'] if video: img = video else: img = Image(post_url, date, url, page) img.data = data # imgs.append(img) return imgs
def get_imgs_from_illust(illust, api=None, types={'illust', 'manga', 'ugoira'}, format=None, format_name=None, dir='', print_=None, cw=None): print('get_imgs_from_illust', api, types, format, format_name, dir) print_ = get_print(cw) if api is None: api = pixiv_auth.get_api() if types is not None and illust.get('type', 'illust') not in types: return [] imgs = [] if illust.type == 'ugoira': sleep(0.2) for try_ in range(N_TRY): print_(('read ugoira... {}').format(illust.id)) try: ugoira_data = api.ugoira_metadata(illust.id, req_auth=True) error = ugoira_data.get('error') if error: raise PixivError(error) break except PixivError as e: api = e.api print_(e) msg = error.get('user_message', '') if u'公開制限エラー' in msg: print_('invalid ugoira; ignore') return [] if u'該当作品の公開レベルにより閲覧できません' in msg: print_('invalid ugoira (2); ignore') return [] if try_ < N_TRY - 1: print_('retry...') sleep(SLEEP, cw) else: raise ugoira_data = ugoira_data.ugoira_metadata url = ugoira_data.zip_urls.medium.replace('600x600', '1920x1080') img = Img(illust, url, ugoira_data=ugoira_data, format_name=format_name) if format is not None: filename = os.path.join(dir, img.filename) filename = os.path.splitext(filename)[0] + '.' + format filename_old = os.path.join(dir, ('{}_ugoira1920x1080.{}').format(img.id, format)) if os.path.isfile(filename_old) and not os.path.isfile(filename): print_((u'rename: {} -> {}').format(os.path.basename(filename), os.path.basename(filename))) os.rename(filename_old, filename) if os.path.isfile(filename): print_((u'skip ugoira: {}').format(filename)) img = Img(illust, filename, ugoira_data=ugoira_data, format_name=format_name) imgs.append(img) elif illust.page_count == 1: img = Img(illust, illust.meta_single_page.original_image_url, format_name=format_name) imgs.append(img) else: pages = illust.meta_pages for page in pages: img = Img(illust, page.image_urls.original, format_name=format_name) imgs.append(img) return imgs
def get_imgs(url, title=None, cw=None): url = clean_url(url) if 's=view' in url and 'page=favorites' not in url: raise NotImplementedError('Not Implemented') tags = get_tags(url) tags = quote(tags, safe='/') tags = tags.replace('%20', '+') url = 'https://gelbooru.com/index.php?page=post&s=list&tags={}'.format(tags) print_ = get_print(cw) # Range max_pid = get_max_range(cw) imgs = [] ids = set() count_no_imgs = 0 for p in range(500): #1017 url = setPage(url, len(ids)) print_(url) html = downloader.read_html(url) soup = Soup(html) posts = soup.findAll(class_='thumbnail-preview') imgs_new = [] for post in posts: id_ = int(re.find('[0-9]+', post.find('a')['id'], err='no id')) if id_ in ids: print('duplicate:', id_) continue ids.add(id_) url_img = urljoin(url, post.find('a')['href']) img = Image(id_, url_img) imgs_new.append(img) if imgs_new: imgs += imgs_new count_no_imgs = 0 else: print('no imgs') count_no_imgs += 1 if count_no_imgs > 1: print('break') break if len(imgs) >= max_pid: break if cw is not None: if not cw.alive: break cw.setTitle(u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))) if not imgs: raise Exception('no imgs') return imgs
def extract(name, html, cw=None): print_ = get_print(cw) value = re.find(r'''{} *= *['"](.*?)['"]'''.format(name), html) if value is None: value = json.loads(re.find(r'''{} *= *(\[.*?\])'''.format(name), html)) print_('{}: {}'.format(name, value)) if value is None: raise Exception('No {}'.format(name)) return value
def print_streams(streams, cw): print_ = get_print(cw) for stream in streams: print_(u'[{}][{}fps][{}{}][{}] {} [{} / {}] ─ {}'.format( stream.resolution, stream.fps, stream.abr_str, '(fixed)' if stream.abr_fixed else '', stream.tbr, stream.subtype, stream.video_codec, stream.audio_codec, stream.format)) print_('')
def get_sd(url, session=None, html=None, cw=None, wait=True): print_ = get_print(cw) if html: soup = Soup(html) check_error(soup, cw, wait) for script in soup.findAll('script'): j = get_j(script) if j: break else: raise Exception('no _sharedData!!') else: for try_ in range(4): _wait(cw) html = read_html(url, session, cw) soup = Soup(html) check_error(soup, cw, wait) for script in soup.findAll('script'): j = get_j(script) if j: break else: continue break else: raise Exception('no _sharedData') for script in soup.findAll('script'): s = script.string if s and 'window.__additionalDataLoaded(' in s: s = cut_pair(s) j_add = json.loads(s) try: j['entry_data']['PostPage'][0].update(j_add) except: j['entry_data']['ProfilePage'][0].update(j_add) #2900 # Challenge challenge = j['entry_data'].get('Challenge') if challenge: try: for cont in challenge[0]['extraData']['content']: title = cont.get('title') if title: break else: raise Exception('no title') except: title = 'Err' raise errors.LoginRequired(title) # LoginAndSignupPage login = j['entry_data'].get('LoginAndSignupPage') if login: raise errors.LoginRequired() return j
def get(self, url): print_ = get_print(self.cw) if self._url: return self._url ydl = ytdl.YoutubeDL(cw=self.cw) try: info = ydl.extract_info(url) except Exception as e: ex = type(ytdl.get_extractor(url))(ydl) _download_info = getattr(ex, '_download_info', None) if _download_info is not None: vod_id = ex._match_id(url) info = _download_info(vod_id) print_(info) if 'HTTPError 403' in str(e): raise errors.LoginRequired() raise def print_video(video): print_('[{}] [{}] [{}] {}'.format(video['format_id'], video.get('height'), video.get('tbr'), video['url'])) videos = [video for video in info['formats'] if video.get('height')] videos = sorted(videos, key=lambda video: (video.get('height', 0), video.get('tbr', 0)), reverse=True) for video in videos: print_video(video) for video in videos: if video.get('height', 0) <= get_resolution(): #3723 video_best = video break else: video_best = videos[-1] print_video(video) video = video_best['url'] ext = get_ext(video) self.title = info['title'] id = info['display_id'] if ext.lower() == '.m3u8': video = M3u8_stream(video, n_thread=4, alter=alter) ext = '.mp4' self.filename = format_filename(self.title, id, ext) self.url_thumb = info['thumbnail'] self.thumb = BytesIO() downloader.download(self.url_thumb, buffer=self.thumb) self._url = video return self._url
def get_files(url, session, multi_post=False, cw=None): print_ = get_print(cw) html = read_html(url, session=session) soup = Soup(html) h = soup.find('h1', class_='title') content = h.parent.parent.parent title = h.text.strip() youtube = content.find('div', class_='embedded-video') video = content.find('video') if youtube: type = 'youtube' elif video: type = 'video' else: type = 'image' print_(('type: {}').format(type)) files = [] if type == 'image': urls = set() for img in content.findAll('img'): img = urljoin(url, img.parent.attrs['href']) if '/files/' not in img: continue if img in urls: print('duplicate') continue urls.add(img) file = File(type, img, title, url, len(files), multi_post=multi_post) files.append(file) elif type == 'youtube': src = urljoin(url, youtube.find('iframe').attrs['src']) file = File(type, src, title, url) files.append(file) elif type == 'video': url_thumb = urljoin(url, video.attrs['poster']) print('url_thumb:', url_thumb) id = re.find('videos/([0-9a-zA-Z_-]+)', url, err='no video id') url_data = urljoin(url, '/api/video/{}'.format(id)) s_json = read_html(url_data, url, session=session) data = json.loads(s_json) video = data[0] url_video = urljoin(url, video['uri']) file = File(type, url_video, title, url) file.url_thumb = url_thumb file.thumb = BytesIO() downloader.download(url_thumb, buffer=file.thumb, referer=url) files.append(file) else: raise NotImplementedError(type) return files
def get_video(session, id, cw=None): print_ = get_print(cw) try: info = nndownload.request_video(session, id) except: raise Exception('Err') video = Video(session, info) return video
def get_imgs_more(username, session, title, types, n=None, format='[%y-%m-%d] id_ppage', cw=None, mode='media', method='tab', imgs=None): print_ = get_print(cw) imgs = imgs or [] print_('imgs: {}, types: {}'.format(len(imgs), ', '.join(types))) artist, username = get_artist_username(username, session)# # Range n = max(n or 0, get_max_range(cw)) ids_set = set(img.id for img in imgs) count_no_imgs = 0 filter_ = '' if options.get('experimental') else ' filter:media' #2687 while len(imgs) < n: if ids_set: max_id = min(ids_set) - 1 q = 'from:{} max_id:{} exclude:retweets{} -filter:periscope'.format(username, max_id, filter_) else: q = 'from:{} exclude:retweets{} -filter:periscope'.format(username, filter_) print(q) tweets = [] for tweet in list(TwitterAPI(session, cw).search(q)): id = int(tweet['id']) if id in ids_set: print_('duplicate: {}'.format(id)) continue ids_set.add(id) tweets.append(tweet) if tweets: count_no_imgs = 0 else: count_no_imgs += 1 change_ua(session) if count_no_imgs >= 3: break print_('retry...') continue for tweet in tweets: imgs += get_imgs_from_tweet(tweet, session, types, format, cw) msg = '{} {} (@{}) - {}'.format(tr_('읽는 중...'), artist, username, len(imgs)) if cw and not cw.alive: break if cw: cw.setTitle(msg) else: print(msg) return imgs
def get_imgs(page, cw=None): print_ = get_print(cw) html = downloader.read_html(page.url) soup = Soup(html) type_ = re.find('''webtoonType *: *['"](.+?)['"]''', html) print_('type: {}'.format(type_)) imgs = [] if type_ == 'DEFAULT': # https://m.comic.naver.com/webtoon/detail.nhn?titleId=715772 view = soup.find('div', class_='toon_view_lst') for img in view.findAll('img'): img = img.attrs.get('data-src') if not img: continue img = urljoin(page.url, img) img = Image(img, page, len(imgs)) imgs.append(img) elif type_ == 'CUTTOON': # https://m.comic.naver.com/webtoon/detail.nhn?titleId=752803 view = soup.find('div', class_='swiper-wrapper') for div in view.findAll('div', class_='swiper-slide'): if div.parent != view: continue if div.find('div', class_='cut_viewer_last'): print('cut_viewer_last') continue if div.find('div', class_='cut_viewer_recomm'): print('cut_viewer_recomm') continue img = div.find('img') img = img.attrs['data-src'] img = urljoin(page.url, img) img = Image(img, page, len(imgs)) imgs.append(img) elif type_ == 'EFFECTTOON': #2313; https://m.comic.naver.com/webtoon/detail.nhn?titleId=670144 img_base = re.find('''imageUrl *: *['"](.+?)['"]''', html) + '/' print('img_base:', img_base) url_api = re.find('''documentUrl *: *['"](.+?)['"]''', html) data_raw = downloader.read_html(url_api, page.url) data = json.loads(data_raw) for img in data['assets']['stillcut'].values( ): # ordered in python3.7+ img = urljoin(img_base, img) img = Image(img, page, len(imgs)) imgs.append(img) else: _imgs = re.findall('sImageUrl *: *[\'"](.+?)[\'"]', html) if not _imgs: raise Exception('no imgs') for img in _imgs: img = urljoin(page.url, img) img = Image(img, page, len(imgs)) imgs.append(img) return imgs
def get_imgs(page, session, cw): print_ = get_print(cw) if not downloader.cookiejar.get( 'PROF', domain='.daum.net') and page.serviceType != 'free': #3314 raise NotPaidError() html = downloader.read_html(page.url, session=session) header, id = get_id(page.url) t = int(time()) soup = Soup(html) type_ = header_to_type(header) url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format( type_, id, t) data_raw = downloader.read_html(url_data, session=session, referer=page.url) data = json.loads(data_raw) if header == 'league_': m_type = None else: m_type = data['data']['webtoonEpisode']['multiType'] print_('m_type: {}'.format(m_type)) if m_type == 'chatting': page.url = page.url.replace('daum.net/', 'daum.net/m/') url_data = 'http://webtoon.daum.net/data/mobile/{}/viewer?id={}&{}'.format( type_, id, t) data_raw = downloader.read_html(url_data, session=session, referer=page.url) data = json.loads(data_raw) imgs = [] for chat in data['data']['webtoonEpisodeChattings']: img = chat.get('image') if not img: continue img = Image(img['url'], page, len(imgs)) imgs.append(img) else: url_data = 'http://webtoon.daum.net/data/pc/{}/viewer_images/{}?timeStamp={}'.format( type_, id, t) data_raw = downloader.read_html(url_data, session=session, referer=page.url) data = json.loads(data_raw) if not data.get('data'): raise NotPaidError() imgs = [] for img in data['data']: img = Image(img['url'], page, len(imgs)) imgs.append(img) return imgs
def check_error(soup, cw, wait): print_ = get_print(cw) err = soup.find('div', class_='error-container') if err: err = err.text.strip() if wait: print_('err: {}'.format(err)) sleep(60*30, cw) else: raise Exception(err)
def print_streams(streams, cw): print_ = get_print(cw) for stream in streams: format = stream.format print_(u'[{}][{}fps][{}] {} {} ─ {}'.format(stream.resolution, stream.fps, stream.abr, stream.subtype, stream.audio_codec, format)) print_('')
def real_url(url, session=None, cw=None): print_ = get_print(cw) if session is None: session = Session() data = clf2.solve(url, session=session, cw=cw) url_new = data['url'] print('url_new:', url_new) if url_new != url: url_new = urljoin(url_new, '/' + u'/'.join(url.split('/')[3:])) # print_(u'[redirect domain] {} -> {}'.format(url, url_new)) return url_new
def get_title(soup, cw=None): print_ = get_print(cw) for h1 in soup.findAll('h1'): title = h1.text.strip() if title: break else: raise Exception('no title') title_clean = clean_title(title) print_('get_title: "{}"({}) "{}"({})'.format(title, title.encode('utf8'), title_clean, title_clean.encode('utf8'))) return title_clean
def read(self): ui_setting = self.ui_setting cw = self.customWidget print_ = get_print(cw) if self.yt_type == 'video': res = get_resolution() info = get_videos(self.url, type=self.yt_type, max_res=res, only_mp4=False, audio_included=not True, cw=cw) else: abr = get_abr() info = get_videos(self.url, type=self.yt_type, max_abr=abr, cw=cw) videos = info['videos'] cw.enableSegment(overwrite=True) # first video must be valid while videos: video = videos[0] try: video.url() break except Exception as e: print(e) videos.remove(video) else: raise Exception('No videos') if len(videos) > 1: p2f = get_p2f(cw) if p2f: self.single = False self.title = clean_title(info['title']) self.urls = [video.url for video in videos] video = videos[0] self.setIcon(video.thumb) return else: video = videos.pop(0) cw.gal_num = cw.url = video.url._url if videos and cw.alive: s = u', '.join(video.url._url for video in videos) self.exec_queue.put(([s, { 'youtube': cw.format }], 'downButton(cw[0], format_selector=cw[1])')) self.urls.append(video.url) self.artist = video.username self.setIcon(video.thumb) self.title = video.title
def get_imgs_from_tweet(tweet, session, types, format, cw=None): print_ = get_print(cw) id = tweet['id_str'] if 'extended_entities' not in tweet: tweet['extended_entities'] = {'media': []} for url_ in tweet['entities'].get('urls', []): url_ = url_['expanded_url'] if '//twitpic.com/' in url_: print_('twitpic: {}'.format(url_)) try: url_ = get_twitpic(url_, session) tweet['extended_entities']['media'].append({ 'type': 'photo', 'media_url': url_, 'expanded_url': 'https://twitter.com' }) except Exception as e: print_('Invalid twitpic') print_(print_error(e)[-1]) media = tweet['extended_entities']['media'] time = get_time(tweet) imgs = [] for m in media: type_ = m['type'] if type_ == 'photo': type_ = 'img' elif type_ == 'animated_gif': type_ = 'video' if type_ not in types: continue if type_ == 'video': url_media = sorted(m['video_info']['variants'], key=lambda x: x.get('bitrate', 0))[-1]['url'] elif type_ == 'img': url_media = m['media_url'] if ':' not in os.path.basename(url_media): url_media += ':orig' else: raise NotImplementedError('unknown type') url = m['expanded_url'] img = Image(url_media, url, id, time, len(imgs), format, cw, type_ == 'video') imgs.append(img) return imgs
def fix_soup(soup, url, session=None, cw=None): ''' fix_soup ''' print_ = get_print(cw) if soup.find('div', class_='logo'): return soup print_('invalid soup: {}'.format(url)) res = clf2.solve(url, session=session, cw=cw) return Soup(res['html'])
def get_video(session, url, format, cw=None): print_ = get_print(cw) id = get_id(url) if 'live.nico' in url: #3986 raise NotImplementedError('nama') #info = nndownload.request_nama(session, id) else: info = nndownload.request_video(session, id) video = Video(session, info, format, cw) return video
def __init__(self, url, format=u'[%y-%m-%d] id_ppage', session=None, cw=None, media=None): print('Node', url) print_ = get_print(cw) self.id = re.search(FORMAT_PIN, url).groups()[0] self.imgs = [] self.session = session if not media: if False: # Original j = get_sd(url, self.session, cw=cw) data = j['entry_data']['PostPage'][0]['graphql'] else: variables = { "shortcode": self.id, "child_comment_count": 3, "fetch_comment_count": 40, "parent_comment_count": 24, "has_threaded_comments": True, } j = get_query('a9441f24ac73000fa17fe6e6da11d59d', variables, session, cw) data = j['data'] media = data['shortcode_media'] if 'video_url' in media: urls = [media['video_url']] elif 'edge_sidecar_to_children' in media: edges = media['edge_sidecar_to_children']['edges'] urls = [] for edge in edges: node = edge['node'] if 'video_url' in node: url_ = node['video_url'] else: url_ = node['display_resources'][(-1)]['src'] urls.append(url_) else: urls = [media['display_resources'][(-1)]['src']] time = media['taken_at_timestamp'] self.date = datetime.fromtimestamp(time) self.timeStamp = self.date.strftime(format).replace(':', u'\uff1a') for p, img in enumerate(urls): ext = os.path.splitext(img.split('?')[0].split('#')[0])[1] filename = ('{}{}').format(self.timeStamp, ext).replace( 'id', str(self.id)).replace('page', str(p)) img = Image(img, url, filename) self.imgs.append(img)
def get_imgs_channel(url, html=None, cw=None): print_ = get_print(cw) if html is None: html = downloader.read_html(url) info = get_info(url, html) # Range max_pid = get_max_range(cw) ids = set() imgs = [] for p in range(1000): url_api = 'https://bcy.net/apiv3/user/selfPosts?uid={}'.format( info['uid']) if imgs: url_api += '&since={}'.format(imgs[-1].id) data_raw = downloader.read_html(url_api, url) data = json.loads(data_raw)['data'] items = data['items'] if not items: print('no items') break c = 0 for item in items: check_alive(cw) id = item['item_detail']['item_id'] if id in ids: print('duplicate') continue c += 1 ids.add(id) url_single = u'https://bcy.net/item/detail/{}'.format(id) imgs_single = get_imgs(url_single, cw=cw) print_(str(id)) for p, img in enumerate(imgs_single): img = Image(img._url, url_single, id, p) imgs.append(img) s = u'{} {} - {}'.format(tr_(u'읽는 중...'), info['artist'], min(len(imgs), max_pid)) if cw: cw.setTitle(s) else: print(s) if len(imgs) >= max_pid: break if not c: print('not c') break if len(imgs) >= max_pid: print('over max_pid:', max_pid) break return imgs[:max_pid]
def get_video(url, session, cw, ie_key=None): print_ = get_print(cw) try: video = _get_video(url, session, cw, ie_key, allow_m3u8=True) if isinstance(video.url(), M3u8_stream): c = video.url().segs[0].download(cw) if not c: raise Exception('invalid m3u8') return video except Exception as e: print_(e) return _get_video(url, session, cw, ie_key, allow_m3u8=False)
def get(self, _): print_ = get_print(self.cw) url = self._url ext = get_ext(url) if ext.lower() == '.gif': print_('get_ext: {}, {}'.format(self.id_, url)) try: ext = downloader.get_ext(url) except Exception as e: #3235 print_('Err: {}, {}\n'.format(self.id_, url)+print_error(e)[0]) self.filename = '{}_p{}{}'.format(self.id_, self.p, ext) return url
def get_imgs(url, title=None, cw=None): print_ = get_print(cw) url = clean_url(url) id = get_id(url) url = u'https://nijie.info/members_illust.php?id={}'.format(id) # Range max_pid = get_max_range(cw) imgs = [] url_imgs = set() for p in range(1, 1 + 100): url = setPage(url, p) print_(url) html = downloader.read_html(url) soup = Soup(html) posts = soup.findAll('div', class_='nijie') if not posts: print('no posts') break c = 0 for post in posts: url_img = urljoin(url, post.a.attrs['href']) if url_img in url_imgs: print('duplicate:', url_img) continue url_imgs.add(url_img) id = int(re.find('[?&]id=([0-9]+)', url_img)) multi = post.find('div', class_='thumbnail-icon') if multi: imgs_ = get_imgs_post(id, url_img) # else: imgs_ = [Image(id, url_img, 0)] imgs += imgs_ c += 1 if len(imgs) >= max_pid: break msg = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs)) if cw: if not cw.alive: return cw.setTitle(msg) else: print(msg) if len(imgs) >= max_pid or c == 0: break return imgs
def read_channel(url_page, cw=None): print_ = get_print(cw) res = re.find(CHANNEL_PATTERN, url_page) if res is None: raise Exception('Not channel') header, username = res print(header, username) max_pid = get_max_range(cw) info = {} info['header'] = header info['username'] = username session = Session() urls = [] ids = set() for p in range(100): url_api = urljoin(url_page, '/{}/{}/videos/best/{}'.format(header, username, p)) print_(url_api) r = session.post(url_api) data = json.loads(r.text) videos = data.get('videos') #4530 if not videos: print_('empty') break for video in videos: id_ = video['id'] if id_ in ids: print_('duplicate: {}'.format(id_)) continue ids.add(id_) info['name'] = video['pn'] urls.append(urljoin(url_page, video['u'])) if len(urls) >= max_pid: break n = data['nb_videos'] s = '{} {} - {}'.format(tr_('읽는 중...'), info['name'], len(urls)) if cw: cw.setTitle(s) else: print(s) if len(ids) >= n: break sleep(1, cw) if not urls: raise Exception('no videos') info['urls'] = urls[:max_pid] return info
def get(self, _): print_ = get_print(self.cw) url = self._url ext = get_ext(url) if ext.lower()[1:] not in ['jpg', 'png', 'mp4']: #4645 print_('get_ext: {}, {}'.format(self.id_, url)) try: ext = downloader.get_ext(url, referer=_) except Exception as e: #3235 print_('Err: {}, {}\n'.format(self.id_, url) + print_error(e)[0]) self.filename = '{}_p{}{}'.format(self.id_, self.p, ext) return url
def get(self, url): print_ = get_print(self.cw) if self._url: return self._url info = self.info ## ydl = ytdl.YoutubeDL() ## info = ydl.extract_info(url) formats = info['formats'] print(formats) formats = sorted(formats, key=lambda x: int(x.get('abr', 0)), reverse=True) url_audio = None for format in formats: protocol = format['protocol'] print_(u'【{}】 format【{}】 abr【{}】'.format(protocol, format['format'], format.get('abr', 0))) if not url_audio and protocol in ['http', 'https']: url_audio = format['url'] if not url_audio: url_audio = M3u8_stream(formats[0]['url']) self.album_art = False # self.username = info['uploader'] self.title = u'{} - {}'.format(self.username, info['title']) self.filename = u'{}{}'.format( clean_title(self.title, allow_dot=True, n=-4), '.mp3') thumb = None for t in info['thumbnails'][::-1]: width = t.get('width', 1080) if not 100 <= width <= 500: continue url_thumb = t['url'] thumb = BytesIO() try: downloader.download(url_thumb, buffer=thumb) break except Exception as e: print(e) thumb = None self.thumb = thumb self._url = url_audio return self._url