def sort_formats(info): sorted_formats = info['formats'].copy() sorted_formats.sort(key=lambda x: util.default_multi_get( _formats, x['format_id'], 'height', default=0)) for index, format in enumerate(sorted_formats): if util.default_multi_get( _formats, format['format_id'], 'height', default=0) >= 360: break sorted_formats = sorted_formats[index:] + sorted_formats[0:index] sorted_formats = [ format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none' ] return sorted_formats
def get_channel_page(env, start_response): path_parts = env['path_parts'] channel_id = path_parts[1] try: tab = path_parts[2] except IndexError: tab = 'videos' parameters = env['parameters'] page_number = int(util.default_multi_get(parameters, 'page', 0, default='1')) sort = util.default_multi_get(parameters, 'sort', 0, default='3') view = util.default_multi_get(parameters, 'view', 0, default='1') query = util.default_multi_get(parameters, 'query', 0, default='') if tab == 'videos': tasks = ( gevent.spawn(get_number_of_videos, channel_id ), gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view) ) gevent.joinall(tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value result = channel_videos_html(polymer_json, page_number, sort, number_of_videos, env['QUERY_STRING']) elif tab == 'about': polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1) polymer_json = json.loads(polymer_json) result = channel_about_page(polymer_json) elif tab == 'playlists': polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1) '''with open('debug/channel_playlists_debug', 'wb') as f: f.write(polymer_json)''' polymer_json = json.loads(polymer_json) result = channel_playlists_html(polymer_json, sort) elif tab == 'search': tasks = ( gevent.spawn(get_number_of_videos, channel_id ), gevent.spawn(get_channel_search_json, channel_id, query, page_number) ) gevent.joinall(tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value result = channel_search_page(polymer_json, query, page_number, number_of_videos, env['QUERY_STRING']) else: start_response('404 Not Found', [('Content-type', 'text/plain'),]) return b'Unknown channel tab: ' + tab.encode('utf-8') start_response('200 OK', [('Content-type','text/html'),]) return result.encode('utf-8')
def get_post_comment_page(env, start_response): start_response('200 OK', [ ('Content-type', 'text/html'), ]) parameters = env['parameters'] video_id = parameters['video_id'][0] parent_id = util.default_multi_get(parameters, 'parent_id', 0, default='') style = ''' main{ display: grid; grid-template-columns: 3fr 2fr; } .left{ display:grid; grid-template-columns: 1fr 640px; } textarea{ width: 460px; height: 85px; } .comment-form{ grid-column:2; justify-content:start; }''' if parent_id: # comment reply comment_box = comments.comment_box_template.substitute( form_action=util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id, video_id_input='', post_text="Post reply", options=comments.comment_box_account_options(), ) else: comment_box = comments.comment_box_template.substitute( form_action=util.URL_ORIGIN + '/post_comment', video_id_input='''<input type="hidden" name="video_id" value="''' + video_id + '''">''', post_text="Post comment", options=comments.comment_box_account_options(), ) page = '''<div class="left">\n''' + comment_box + '''</div>\n''' return html_common.yt_basic_template.substitute( page_title="Post comment reply" if parent_id else "Post a comment", style=style, header=html_common.get_header(), page=page, ).encode('utf-8')
def parse_comments_ajax(content, replies=False): try: content = json.loads(util.uppercase_escape(content.decode('utf-8'))) #print(content) comments_raw = content['content']['continuation_contents']['contents'] ctoken = util.default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='') comments = [] for comment_raw in comments_raw: replies_url = '' if not replies: if comment_raw['replies'] is not None: reply_ctoken = comment_raw['replies']['continuations'][0][ 'continuation'] comment_id, video_id = get_ids(reply_ctoken) replies_url = util.URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id comment_raw = comment_raw['comment'] comment = { 'author': comment_raw['author']['runs'][0]['text'], 'author_url': comment_raw['author_endpoint']['url'], 'author_channel_id': '', 'author_id': '', 'author_avatar': comment_raw['author_thumbnail']['url'], 'likes': comment_raw['like_count'], 'published': comment_raw['published_time']['runs'][0]['text'], 'text': comment_raw['content']['runs'], 'reply_count': '', 'replies_url': replies_url, } comments.append(comment) except Exception as e: print('Error parsing comments: ' + str(e)) comments = () ctoken = '' return {'ctoken': ctoken, 'comments': comments}
def get_watch_page(env, start_response): video_id = env['parameters']['v'][0] if len(video_id) < 11: start_response('404 Not Found', [ ('Content-type', 'text/plain'), ]) return b'Incomplete video id (too short): ' + video_id.encode('ascii') start_response('200 OK', [ ('Content-type', 'text/html'), ]) lc = util.default_multi_get(env['parameters'], 'lc', 0, default='') if settings.route_tor: proxy = 'socks5://127.0.0.1:9150/' else: proxy = '' downloader = YoutubeDL(params={ 'youtube_include_dash_manifest': False, 'proxy': proxy }) tasks = (gevent.spawn(comments.video_comments, video_id, int(settings.default_comment_sorting), lc=lc), gevent.spawn(extract_info, downloader, "https://www.youtube.com/watch?v=" + video_id, download=False)) gevent.joinall(tasks) comments_html, info = tasks[0].value, tasks[1].value #comments_html = comments.comments_html(video_id(url)) #info = YoutubeDL().extract_info(url, download=False) #chosen_format = choose_format(info) if isinstance(info, str): # youtube error return html_common.yt_basic_template.substitute( page_title="Error", style="", header=html_common.get_header(), page=html.escape(info), ).encode('utf-8') sorted_formats = sort_formats(info) video_info = { "duration": util.seconds_to_timestamp(info["duration"]), "id": info['id'], "title": info['title'], "author": info['uploader'], } upload_year = info["upload_date"][0:4] upload_month = info["upload_date"][4:6] upload_day = info["upload_date"][6:8] upload_date = upload_month + "/" + upload_day + "/" + upload_year if settings.enable_related_videos: related_videos_html = get_related_items_html(info) else: related_videos_html = '' music_list = info['music_list'] if len(music_list) == 0: music_list_html = '' else: # get the set of attributes which are used by atleast 1 track # so there isn't an empty, extraneous album column which no tracks use, for example used_attributes = set() for track in music_list: used_attributes = used_attributes | track.keys() # now put them in the right order ordered_attributes = [] for attribute in ('Artist', 'Title', 'Album'): if attribute.lower() in used_attributes: ordered_attributes.append(attribute) music_list_html = '''<hr> <table> <caption>Music</caption> <tr> ''' # table headings for attribute in ordered_attributes: music_list_html += "<th>" + attribute + "</th>\n" music_list_html += '''</tr>\n''' for track in music_list: music_list_html += '''<tr>\n''' for attribute in ordered_attributes: try: value = track[attribute.lower()] except KeyError: music_list_html += '''<td></td>''' else: music_list_html += '''<td>''' + html.escape( value) + '''</td>''' music_list_html += '''</tr>\n''' music_list_html += '''</table>\n''' if settings.gather_googlevideo_domains: with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: url = info['formats'][0]['url'] subdomain = url[0:url.find(".googlevideo.com")] f.write(subdomain + "\n") download_options = '' for format in info['formats']: download_options += download_link_template.substitute( url=html.escape(format['url']), ext=html.escape(format['ext']), resolution=html.escape(downloader.format_resolution(format)), note=html.escape(downloader._format_note(format)), ) page = yt_watch_template.substitute( video_title=html.escape(info["title"]), page_title=html.escape(info["title"]), header=html_common.get_header(), uploader=html.escape(info["uploader"]), uploader_channel_url='/' + info["uploader_url"], upload_date=upload_date, views=(lambda x: '{:,}'.format(x) if x is not None else "")(info.get("view_count", None)), likes=(lambda x: '{:,}'.format(x) if x is not None else "")(info.get("like_count", None)), dislikes=(lambda x: '{:,}'.format(x) if x is not None else "")(info.get("dislike_count", None)), download_options=download_options, video_info=html.escape(json.dumps(video_info)), description=html.escape(info["description"]), video_sources=formats_html(sorted_formats) + subtitles_html(info), related=related_videos_html, comments=comments_html, music_list=music_list_html, is_unlisted='<span class="is-unlisted">Unlisted</span>' if info['unlisted'] else '', ) return page.encode('utf-8')
def get_comments_page(env, start_response): start_response('200 OK', [ ('Content-type', 'text/html'), ]) parameters = env['parameters'] ctoken = util.default_multi_get(parameters, 'ctoken', 0, default='') replies = False if not ctoken: video_id = parameters['video_id'][0] parent_id = parameters['parent_id'][0] ctoken = comment_replies_ctoken(video_id, parent_id) replies = True comment_info = parse_comments_polymer(request_comments(ctoken, replies), replies) metadata = ctoken_metadata(ctoken) if replies: page_title = 'Replies' video_metadata = '' comment_box = comment_box_template.substitute( form_action='', video_id_input='', post_text='Post reply', options=comment_box_account_options()) comment_links = '' else: page_number = str(int(metadata['offset'] / 20) + 1) page_title = 'Comments page ' + page_number video_metadata = video_metadata_template.substitute( page_number=page_number, sort='top' if metadata['sort'] == 0 else 'newest', title=html.escape(comment_info['video_title']), url=util.URL_ORIGIN + '/watch?v=' + metadata['video_id'], thumbnail='/i.ytimg.com/vi/' + metadata['video_id'] + '/mqdefault.jpg', ) comment_box = comment_box_template.substitute( form_action=util.URL_ORIGIN + '/post_comment', video_id_input='''<input type="hidden" name="video_id" value="''' + metadata['video_id'] + '''">''', post_text='Post comment', options=comment_box_account_options(), ) other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken( metadata['video_id'], sort=1 - metadata['sort']) other_sort_name = 'newest' if metadata['sort'] == 0 else 'top' other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>''' comment_links = '''<div class="comment-links">\n''' comment_links += other_sort_link + '\n' comment_links += '''</div>''' comments_html = get_comments_html(comment_info['comments']) ctoken = comment_info['ctoken'] if ctoken == '': more_comments_button = '' else: more_comments_button = more_comments_template.substitute( url=util.URL_ORIGIN + '/comments?ctoken=' + ctoken) comments_area = '<section class="comments-area">\n' comments_area += video_metadata + comment_box + comment_links + '\n' comments_area += '<div class="comments">\n' comments_area += comments_html + '\n' comments_area += '</div>\n' comments_area += more_comments_button + '\n' comments_area += '</section>\n' return yt_comments_template.substitute( header=html_common.get_header(), comments_area=comments_area, page_title=page_title, ).encode('utf-8')
def parse_comments_polymer(content, replies=False): try: video_title = '' content = json.loads(util.uppercase_escape(content.decode('utf-8'))) url = content[1]['url'] ctoken = urllib.parse.parse_qs(url[url.find('?') + 1:])['ctoken'][0] video_id = ctoken_metadata(ctoken)['video_id'] #print(content) try: comments_raw = content[1]['response']['continuationContents'][ 'commentSectionContinuation']['items'] except KeyError: comments_raw = content[1]['response']['continuationContents'][ 'commentRepliesContinuation']['contents'] replies = True ctoken = util.default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='') comments = [] for comment_raw in comments_raw: replies_url = '' view_replies_text = '' try: comment_raw = comment_raw['commentThreadRenderer'] except KeyError: pass else: if 'commentTargetTitle' in comment_raw: video_title = comment_raw['commentTargetTitle']['runs'][0][ 'text'] parent_id = comment_raw['comment']['commentRenderer'][ 'commentId'] # TODO: move this stuff into the comments_html function if 'replies' in comment_raw: #reply_ctoken = comment_raw['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']['continuation'] #comment_id, video_id = get_ids(reply_ctoken) replies_url = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id view_replies_text = yt_data_extract.get_plain_text( comment_raw['replies']['commentRepliesRenderer'] ['moreText']) match = reply_count_regex.search(view_replies_text) if match is None: view_replies_text = '1 reply' else: view_replies_text = match.group(1) + " replies" elif not replies: view_replies_text = "Reply" replies_url = util.URL_ORIGIN + '/post_comment?parent_id=' + parent_id + "&video_id=" + video_id comment_raw = comment_raw['comment'] comment_raw = comment_raw['commentRenderer'] comment = { 'author_id': comment_raw.get('authorId', ''), 'author_avatar': comment_raw['authorThumbnail']['thumbnails'][0]['url'], 'likes': comment_raw['likeCount'], 'published': yt_data_extract.get_plain_text( comment_raw['publishedTimeText']), 'text': comment_raw['contentText'].get('runs', ''), 'view_replies_text': view_replies_text, 'replies_url': replies_url, 'video_id': video_id, 'comment_id': comment_raw['commentId'], } if 'authorText' in comment_raw: # deleted channels have no name or channel link comment['author'] = yt_data_extract.get_plain_text( comment_raw['authorText']) comment['author_url'] = comment_raw['authorEndpoint'][ 'commandMetadata']['webCommandMetadata']['url'] comment['author_channel_id'] = comment_raw['authorEndpoint'][ 'browseEndpoint']['browseId'] else: comment['author'] = '' comment['author_url'] = '' comment['author_channel_id'] = '' comments.append(comment) except Exception as e: print('Error parsing comments: ' + str(e)) comments = () ctoken = '' return {'ctoken': ctoken, 'comments': comments, 'video_title': video_title}
def parse_comments_polymer(content): try: video_title = '' content = json.loads(util.uppercase_escape(content.decode('utf-8'))) url = content[1]['url'] ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0] metadata = ctoken_metadata(ctoken) try: comments_raw = content[1]['response']['continuationContents']['commentSectionContinuation']['items'] except KeyError: comments_raw = content[1]['response']['continuationContents']['commentRepliesContinuation']['contents'] ctoken = util.default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='') comments = [] for comment_json in comments_raw: number_of_replies = 0 try: comment_thread = comment_json['commentThreadRenderer'] except KeyError: comment_renderer = comment_json['commentRenderer'] else: if 'commentTargetTitle' in comment_thread: video_title = comment_thread['commentTargetTitle']['runs'][0]['text'] if 'replies' in comment_thread: view_replies_text = yt_data_extract.get_plain_text(comment_thread['replies']['commentRepliesRenderer']['moreText']) view_replies_text = view_replies_text.replace(',', '') match = re.search(r'(\d+)', view_replies_text) if match is None: number_of_replies = 1 else: number_of_replies = int(match.group(1)) comment_renderer = comment_thread['comment']['commentRenderer'] comment = { 'author_id': comment_renderer.get('authorId', ''), 'author_avatar': comment_renderer['authorThumbnail']['thumbnails'][0]['url'], 'likes': comment_renderer['likeCount'], 'published': yt_data_extract.get_plain_text(comment_renderer['publishedTimeText']), 'text': comment_renderer['contentText'].get('runs', ''), 'number_of_replies': number_of_replies, 'comment_id': comment_renderer['commentId'], } if 'authorText' in comment_renderer: # deleted channels have no name or channel link comment['author'] = yt_data_extract.get_plain_text(comment_renderer['authorText']) comment['author_url'] = comment_renderer['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'] comment['author_channel_id'] = comment_renderer['authorEndpoint']['browseEndpoint']['browseId'] else: comment['author'] = '' comment['author_url'] = '' comment['author_channel_id'] = '' comments.append(comment) except Exception as e: print('Error parsing comments: ' + str(e)) comments = () ctoken = '' return { 'ctoken': ctoken, 'comments': comments, 'video_title': video_title, 'video_id': metadata['video_id'], 'offset': metadata['offset'], 'is_replies': metadata['is_replies'], 'sort': metadata['sort'], }