Exemplo n.º 1
0
def sort_formats(info):
    sorted_formats = info['formats'].copy()
    sorted_formats.sort(key=lambda x: util.default_multi_get(
        _formats, x['format_id'], 'height', default=0))
    for index, format in enumerate(sorted_formats):
        if util.default_multi_get(
                _formats, format['format_id'], 'height', default=0) >= 360:
            break
    sorted_formats = sorted_formats[index:] + sorted_formats[0:index]
    sorted_formats = [
        format for format in info['formats']
        if format['acodec'] != 'none' and format['vcodec'] != 'none'
    ]
    return sorted_formats
Exemplo n.º 2
0
def get_channel_page(env, start_response):
    path_parts = env['path_parts']
    channel_id = path_parts[1]
    try:
        tab = path_parts[2]
    except IndexError:
        tab = 'videos'
    
    parameters = env['parameters']
    page_number = int(util.default_multi_get(parameters, 'page', 0, default='1'))
    sort = util.default_multi_get(parameters, 'sort', 0, default='3')
    view = util.default_multi_get(parameters, 'view', 0, default='1')
    query = util.default_multi_get(parameters, 'query', 0, default='')

    if tab == 'videos':
        tasks = (
            gevent.spawn(get_number_of_videos, channel_id ), 
            gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
        )
        gevent.joinall(tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value

        result = channel_videos_html(polymer_json, page_number, sort, number_of_videos, env['QUERY_STRING'])
    elif tab == 'about':
        polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1)
        polymer_json = json.loads(polymer_json)
        result = channel_about_page(polymer_json)
    elif tab == 'playlists':
        polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1)
        '''with open('debug/channel_playlists_debug', 'wb') as f:
            f.write(polymer_json)'''
        polymer_json = json.loads(polymer_json)
        result = channel_playlists_html(polymer_json, sort)
    elif tab == 'search':
        tasks = (
            gevent.spawn(get_number_of_videos, channel_id ), 
            gevent.spawn(get_channel_search_json, channel_id, query, page_number)
        )
        gevent.joinall(tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value

        result = channel_search_page(polymer_json, query, page_number, number_of_videos, env['QUERY_STRING'])
    else:
        start_response('404 Not Found', [('Content-type', 'text/plain'),])
        return b'Unknown channel tab: ' + tab.encode('utf-8')

    start_response('200 OK', [('Content-type','text/html'),])
    return result.encode('utf-8')
Exemplo n.º 3
0
def get_post_comment_page(env, start_response):
    start_response('200 OK', [
        ('Content-type', 'text/html'),
    ])
    parameters = env['parameters']
    video_id = parameters['video_id'][0]
    parent_id = util.default_multi_get(parameters, 'parent_id', 0, default='')

    style = ''' main{
    display: grid;
    grid-template-columns: 3fr 2fr;
}
.left{
    display:grid;
    grid-template-columns: 1fr 640px;
}
textarea{
    width: 460px;
    height: 85px;
}
.comment-form{
    grid-column:2;
    justify-content:start;
}'''
    if parent_id:  # comment reply
        comment_box = comments.comment_box_template.substitute(
            form_action=util.URL_ORIGIN + '/comments?parent_id=' + parent_id +
            "&video_id=" + video_id,
            video_id_input='',
            post_text="Post reply",
            options=comments.comment_box_account_options(),
        )
    else:
        comment_box = comments.comment_box_template.substitute(
            form_action=util.URL_ORIGIN + '/post_comment',
            video_id_input='''<input type="hidden" name="video_id" value="''' +
            video_id + '''">''',
            post_text="Post comment",
            options=comments.comment_box_account_options(),
        )

    page = '''<div class="left">\n''' + comment_box + '''</div>\n'''
    return html_common.yt_basic_template.substitute(
        page_title="Post comment reply" if parent_id else "Post a comment",
        style=style,
        header=html_common.get_header(),
        page=page,
    ).encode('utf-8')
Exemplo n.º 4
0
def parse_comments_ajax(content, replies=False):
    try:
        content = json.loads(util.uppercase_escape(content.decode('utf-8')))
        #print(content)
        comments_raw = content['content']['continuation_contents']['contents']
        ctoken = util.default_multi_get(content,
                                        'content',
                                        'continuation_contents',
                                        'continuations',
                                        0,
                                        'continuation',
                                        default='')

        comments = []
        for comment_raw in comments_raw:
            replies_url = ''
            if not replies:
                if comment_raw['replies'] is not None:
                    reply_ctoken = comment_raw['replies']['continuations'][0][
                        'continuation']
                    comment_id, video_id = get_ids(reply_ctoken)
                    replies_url = util.URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id
                comment_raw = comment_raw['comment']
            comment = {
                'author': comment_raw['author']['runs'][0]['text'],
                'author_url': comment_raw['author_endpoint']['url'],
                'author_channel_id': '',
                'author_id': '',
                'author_avatar': comment_raw['author_thumbnail']['url'],
                'likes': comment_raw['like_count'],
                'published': comment_raw['published_time']['runs'][0]['text'],
                'text': comment_raw['content']['runs'],
                'reply_count': '',
                'replies_url': replies_url,
            }
            comments.append(comment)
    except Exception as e:
        print('Error parsing comments: ' + str(e))
        comments = ()
        ctoken = ''

    return {'ctoken': ctoken, 'comments': comments}
Exemplo n.º 5
0
def get_watch_page(env, start_response):
    video_id = env['parameters']['v'][0]
    if len(video_id) < 11:
        start_response('404 Not Found', [
            ('Content-type', 'text/plain'),
        ])
        return b'Incomplete video id (too short): ' + video_id.encode('ascii')

    start_response('200 OK', [
        ('Content-type', 'text/html'),
    ])

    lc = util.default_multi_get(env['parameters'], 'lc', 0, default='')
    if settings.route_tor:
        proxy = 'socks5://127.0.0.1:9150/'
    else:
        proxy = ''
    downloader = YoutubeDL(params={
        'youtube_include_dash_manifest': False,
        'proxy': proxy
    })
    tasks = (gevent.spawn(comments.video_comments,
                          video_id,
                          int(settings.default_comment_sorting),
                          lc=lc),
             gevent.spawn(extract_info,
                          downloader,
                          "https://www.youtube.com/watch?v=" + video_id,
                          download=False))
    gevent.joinall(tasks)
    comments_html, info = tasks[0].value, tasks[1].value

    #comments_html = comments.comments_html(video_id(url))
    #info = YoutubeDL().extract_info(url, download=False)

    #chosen_format = choose_format(info)

    if isinstance(info, str):  # youtube error
        return html_common.yt_basic_template.substitute(
            page_title="Error",
            style="",
            header=html_common.get_header(),
            page=html.escape(info),
        ).encode('utf-8')

    sorted_formats = sort_formats(info)

    video_info = {
        "duration": util.seconds_to_timestamp(info["duration"]),
        "id": info['id'],
        "title": info['title'],
        "author": info['uploader'],
    }

    upload_year = info["upload_date"][0:4]
    upload_month = info["upload_date"][4:6]
    upload_day = info["upload_date"][6:8]
    upload_date = upload_month + "/" + upload_day + "/" + upload_year

    if settings.enable_related_videos:
        related_videos_html = get_related_items_html(info)
    else:
        related_videos_html = ''

    music_list = info['music_list']
    if len(music_list) == 0:
        music_list_html = ''
    else:
        # get the set of attributes which are used by atleast 1 track
        # so there isn't an empty, extraneous album column which no tracks use, for example
        used_attributes = set()
        for track in music_list:
            used_attributes = used_attributes | track.keys()

        # now put them in the right order
        ordered_attributes = []
        for attribute in ('Artist', 'Title', 'Album'):
            if attribute.lower() in used_attributes:
                ordered_attributes.append(attribute)

        music_list_html = '''<hr>
<table>
    <caption>Music</caption>
    <tr>
'''
        # table headings
        for attribute in ordered_attributes:
            music_list_html += "<th>" + attribute + "</th>\n"
        music_list_html += '''</tr>\n'''

        for track in music_list:
            music_list_html += '''<tr>\n'''
            for attribute in ordered_attributes:
                try:
                    value = track[attribute.lower()]
                except KeyError:
                    music_list_html += '''<td></td>'''
                else:
                    music_list_html += '''<td>''' + html.escape(
                        value) + '''</td>'''
            music_list_html += '''</tr>\n'''
        music_list_html += '''</table>\n'''
    if settings.gather_googlevideo_domains:
        with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'),
                  'a+',
                  encoding='utf-8') as f:
            url = info['formats'][0]['url']
            subdomain = url[0:url.find(".googlevideo.com")]
            f.write(subdomain + "\n")

    download_options = ''
    for format in info['formats']:
        download_options += download_link_template.substitute(
            url=html.escape(format['url']),
            ext=html.escape(format['ext']),
            resolution=html.escape(downloader.format_resolution(format)),
            note=html.escape(downloader._format_note(format)),
        )

    page = yt_watch_template.substitute(
        video_title=html.escape(info["title"]),
        page_title=html.escape(info["title"]),
        header=html_common.get_header(),
        uploader=html.escape(info["uploader"]),
        uploader_channel_url='/' + info["uploader_url"],
        upload_date=upload_date,
        views=(lambda x: '{:,}'.format(x)
               if x is not None else "")(info.get("view_count", None)),
        likes=(lambda x: '{:,}'.format(x)
               if x is not None else "")(info.get("like_count", None)),
        dislikes=(lambda x: '{:,}'.format(x)
                  if x is not None else "")(info.get("dislike_count", None)),
        download_options=download_options,
        video_info=html.escape(json.dumps(video_info)),
        description=html.escape(info["description"]),
        video_sources=formats_html(sorted_formats) + subtitles_html(info),
        related=related_videos_html,
        comments=comments_html,
        music_list=music_list_html,
        is_unlisted='<span class="is-unlisted">Unlisted</span>'
        if info['unlisted'] else '',
    )
    return page.encode('utf-8')
Exemplo n.º 6
0
def get_comments_page(env, start_response):
    start_response('200 OK', [
        ('Content-type', 'text/html'),
    ])
    parameters = env['parameters']
    ctoken = util.default_multi_get(parameters, 'ctoken', 0, default='')
    replies = False
    if not ctoken:
        video_id = parameters['video_id'][0]
        parent_id = parameters['parent_id'][0]

        ctoken = comment_replies_ctoken(video_id, parent_id)
        replies = True

    comment_info = parse_comments_polymer(request_comments(ctoken, replies),
                                          replies)

    metadata = ctoken_metadata(ctoken)
    if replies:
        page_title = 'Replies'
        video_metadata = ''
        comment_box = comment_box_template.substitute(
            form_action='',
            video_id_input='',
            post_text='Post reply',
            options=comment_box_account_options())
        comment_links = ''
    else:
        page_number = str(int(metadata['offset'] / 20) + 1)
        page_title = 'Comments page ' + page_number

        video_metadata = video_metadata_template.substitute(
            page_number=page_number,
            sort='top' if metadata['sort'] == 0 else 'newest',
            title=html.escape(comment_info['video_title']),
            url=util.URL_ORIGIN + '/watch?v=' + metadata['video_id'],
            thumbnail='/i.ytimg.com/vi/' + metadata['video_id'] +
            '/mqdefault.jpg',
        )
        comment_box = comment_box_template.substitute(
            form_action=util.URL_ORIGIN + '/post_comment',
            video_id_input='''<input type="hidden" name="video_id" value="''' +
            metadata['video_id'] + '''">''',
            post_text='Post comment',
            options=comment_box_account_options(),
        )

        other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(
            metadata['video_id'], sort=1 - metadata['sort'])
        other_sort_name = 'newest' if metadata['sort'] == 0 else 'top'
        other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>'''

        comment_links = '''<div class="comment-links">\n'''
        comment_links += other_sort_link + '\n'
        comment_links += '''</div>'''

    comments_html = get_comments_html(comment_info['comments'])
    ctoken = comment_info['ctoken']
    if ctoken == '':
        more_comments_button = ''
    else:
        more_comments_button = more_comments_template.substitute(
            url=util.URL_ORIGIN + '/comments?ctoken=' + ctoken)
    comments_area = '<section class="comments-area">\n'
    comments_area += video_metadata + comment_box + comment_links + '\n'
    comments_area += '<div class="comments">\n'
    comments_area += comments_html + '\n'
    comments_area += '</div>\n'
    comments_area += more_comments_button + '\n'
    comments_area += '</section>\n'
    return yt_comments_template.substitute(
        header=html_common.get_header(),
        comments_area=comments_area,
        page_title=page_title,
    ).encode('utf-8')
Exemplo n.º 7
0
def parse_comments_polymer(content, replies=False):
    try:
        video_title = ''
        content = json.loads(util.uppercase_escape(content.decode('utf-8')))
        url = content[1]['url']
        ctoken = urllib.parse.parse_qs(url[url.find('?') + 1:])['ctoken'][0]
        video_id = ctoken_metadata(ctoken)['video_id']
        #print(content)
        try:
            comments_raw = content[1]['response']['continuationContents'][
                'commentSectionContinuation']['items']
        except KeyError:
            comments_raw = content[1]['response']['continuationContents'][
                'commentRepliesContinuation']['contents']
            replies = True

        ctoken = util.default_multi_get(content,
                                        1,
                                        'response',
                                        'continuationContents',
                                        'commentSectionContinuation',
                                        'continuations',
                                        0,
                                        'nextContinuationData',
                                        'continuation',
                                        default='')

        comments = []
        for comment_raw in comments_raw:
            replies_url = ''
            view_replies_text = ''
            try:
                comment_raw = comment_raw['commentThreadRenderer']
            except KeyError:
                pass
            else:
                if 'commentTargetTitle' in comment_raw:
                    video_title = comment_raw['commentTargetTitle']['runs'][0][
                        'text']

                parent_id = comment_raw['comment']['commentRenderer'][
                    'commentId']
                # TODO: move this stuff into the comments_html function
                if 'replies' in comment_raw:
                    #reply_ctoken = comment_raw['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']['continuation']
                    #comment_id, video_id = get_ids(reply_ctoken)
                    replies_url = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id
                    view_replies_text = yt_data_extract.get_plain_text(
                        comment_raw['replies']['commentRepliesRenderer']
                        ['moreText'])
                    match = reply_count_regex.search(view_replies_text)
                    if match is None:
                        view_replies_text = '1 reply'
                    else:
                        view_replies_text = match.group(1) + " replies"
                elif not replies:
                    view_replies_text = "Reply"
                    replies_url = util.URL_ORIGIN + '/post_comment?parent_id=' + parent_id + "&video_id=" + video_id
                comment_raw = comment_raw['comment']

            comment_raw = comment_raw['commentRenderer']
            comment = {
                'author_id':
                comment_raw.get('authorId', ''),
                'author_avatar':
                comment_raw['authorThumbnail']['thumbnails'][0]['url'],
                'likes':
                comment_raw['likeCount'],
                'published':
                yt_data_extract.get_plain_text(
                    comment_raw['publishedTimeText']),
                'text':
                comment_raw['contentText'].get('runs', ''),
                'view_replies_text':
                view_replies_text,
                'replies_url':
                replies_url,
                'video_id':
                video_id,
                'comment_id':
                comment_raw['commentId'],
            }

            if 'authorText' in comment_raw:  # deleted channels have no name or channel link
                comment['author'] = yt_data_extract.get_plain_text(
                    comment_raw['authorText'])
                comment['author_url'] = comment_raw['authorEndpoint'][
                    'commandMetadata']['webCommandMetadata']['url']
                comment['author_channel_id'] = comment_raw['authorEndpoint'][
                    'browseEndpoint']['browseId']
            else:
                comment['author'] = ''
                comment['author_url'] = ''
                comment['author_channel_id'] = ''

            comments.append(comment)
    except Exception as e:
        print('Error parsing comments: ' + str(e))
        comments = ()
        ctoken = ''

    return {'ctoken': ctoken, 'comments': comments, 'video_title': video_title}
Exemplo n.º 8
0
def parse_comments_polymer(content):
    try:
        video_title = ''
        content = json.loads(util.uppercase_escape(content.decode('utf-8')))
        url = content[1]['url']
        ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0]
        metadata = ctoken_metadata(ctoken)

        try:
            comments_raw = content[1]['response']['continuationContents']['commentSectionContinuation']['items']
        except KeyError:
            comments_raw = content[1]['response']['continuationContents']['commentRepliesContinuation']['contents']

        ctoken = util.default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='')

        comments = []
        for comment_json in comments_raw:
            number_of_replies = 0
            try:
                comment_thread = comment_json['commentThreadRenderer']
            except KeyError:
                comment_renderer = comment_json['commentRenderer']
            else:
                if 'commentTargetTitle' in comment_thread:
                    video_title = comment_thread['commentTargetTitle']['runs'][0]['text']

                if 'replies' in comment_thread:
                    view_replies_text = yt_data_extract.get_plain_text(comment_thread['replies']['commentRepliesRenderer']['moreText'])
                    view_replies_text = view_replies_text.replace(',', '')
                    match = re.search(r'(\d+)', view_replies_text)
                    if match is None:
                        number_of_replies = 1
                    else:
                        number_of_replies = int(match.group(1))
                comment_renderer = comment_thread['comment']['commentRenderer']

            comment = {
                'author_id': comment_renderer.get('authorId', ''),
                'author_avatar': comment_renderer['authorThumbnail']['thumbnails'][0]['url'],
                'likes': comment_renderer['likeCount'],
                'published': yt_data_extract.get_plain_text(comment_renderer['publishedTimeText']),
                'text': comment_renderer['contentText'].get('runs', ''),
                'number_of_replies': number_of_replies,
                'comment_id': comment_renderer['commentId'],
            }

            if 'authorText' in comment_renderer:     # deleted channels have no name or channel link
                comment['author'] = yt_data_extract.get_plain_text(comment_renderer['authorText'])
                comment['author_url'] = comment_renderer['authorEndpoint']['commandMetadata']['webCommandMetadata']['url']
                comment['author_channel_id'] = comment_renderer['authorEndpoint']['browseEndpoint']['browseId']
            else:
                comment['author'] = ''
                comment['author_url'] = ''
                comment['author_channel_id'] = ''

            comments.append(comment)
    except Exception as e:
        print('Error parsing comments: ' + str(e))
        comments = ()
        ctoken = ''

    return {
        'ctoken': ctoken,
        'comments': comments,
        'video_title': video_title,
        'video_id': metadata['video_id'],
        'offset': metadata['offset'],
        'is_replies': metadata['is_replies'],
        'sort': metadata['sort'],
    }