def get_channel_page_general_url(base_url, tab, request): page_number = int(request.args.get('page', 1)) sort = request.args.get('sort', '3') view = request.args.get('view', '1') query = request.args.get('query', '') if tab == 'videos': polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos') elif tab == 'about': polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about') elif tab == 'playlists': polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1, debug_name='gen_channel_playlists') elif tab == 'search': raise NotImplementedError() else: flask.abort(404, 'Unknown channel tab: ' + tab) info = extract_info(json.loads(polymer_json), tab) post_process_channel_info(info) if tab in ('videos', 'search'): info['number_of_videos'] = 1000 info['number_of_pages'] = math.ceil(1000/30) info['header_playlist_names'] = local_playlist.get_playlist_names() if tab in ('videos', 'playlists'): info['current_sort'] = sort elif tab == 'search': info['search_box_value'] = query return flask.render_template('channel.html', parameters_dictionary = request.args, **info )
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1, print_status=True): message = 'Got channel tab' if print_status else None if int(sort) == 2 and int(page) > 1: ctoken = channel_ctoken_v1(channel_id, page, sort, tab, view) ctoken = ctoken.replace('=', '%3D') url = ('https://www.youtube.com/channel/' + channel_id + '/' + tab + '?action_continuation=1&continuation=' + ctoken + '&pbj=1') content = util.fetch_url(url, headers_desktop + real_cookie, debug_name='channel_tab', report_text=message) else: ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view) ctoken = ctoken.replace('=', '%3D') url = 'https://www.youtube.com/browse_ajax?ctoken=' + ctoken content = util.fetch_url(url, headers_desktop + generic_cookie, debug_name='channel_tab', report_text=message) return content
def get_channel_page_general_url(base_url, tab, request, channel_id=None): page_number = int(request.args.get('page', 1)) sort = request.args.get('sort', '3') view = request.args.get('view', '1') query = request.args.get('query', '') if tab == 'videos' and channel_id: tasks = ( gevent.spawn(get_number_of_videos_channel, channel_id), gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view) ) gevent.joinall(tasks) util.check_gevent_exceptions(*tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value elif tab == 'videos': tasks = ( gevent.spawn(get_number_of_videos_general, base_url), gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', headers_desktop, debug_name='gen_channel_videos') ) gevent.joinall(tasks) util.check_gevent_exceptions(*tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value elif tab == 'about': polymer_json = util.fetch_url(base_url + '/about?pbj=1', headers_desktop, debug_name='gen_channel_about') elif tab == 'playlists': polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], headers_desktop, debug_name='gen_channel_playlists') elif tab == 'search' and channel_id: polymer_json = get_channel_search_json(channel_id, query, page_number) elif tab == 'search': url = base_url + '/search?pbj=1&query=' + urllib.parse.quote(query, safe='') polymer_json = util.fetch_url(url, headers_desktop, debug_name='gen_channel_search') else: flask.abort(404, 'Unknown channel tab: ' + tab) info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab) if info['error'] is not None: return flask.render_template('error.html', error_message = info['error']) post_process_channel_info(info) if tab == 'videos': info['number_of_videos'] = number_of_videos info['number_of_pages'] = math.ceil(number_of_videos/30) info['header_playlist_names'] = local_playlist.get_playlist_names() if tab in ('videos', 'playlists'): info['current_sort'] = sort elif tab == 'search': info['search_box_value'] = query info['header_playlist_names'] = local_playlist.get_playlist_names() info['page_number'] = page_number info['subscribed'] = subscriptions.is_subscribed(info['channel_id']) return flask.render_template('channel.html', parameters_dictionary = request.args, **info )
def test_exit_node_retry(monkeypatch, new_identities_till_success): new_identity_state = NewIdentityState(new_identities_till_success) # https://docs.pytest.org/en/stable/monkeypatch.html monkeypatch.setattr(settings, 'route_tor', 1) monkeypatch.setattr(util, 'tor_manager', util.TorManager()) # fresh one MockController.signal = new_identity_state.new_identity monkeypatch.setattr(stem.control, 'Controller', MockController) monkeypatch.setattr(util, 'fetch_url_response', new_identity_state.fetch_url_response) if new_identities_till_success <= NewIdentityState.MAX_TRIES: assert util.fetch_url('url') == b'success' else: with pytest.raises(util.FetchError) as excinfo: util.fetch_url('url') assert int(excinfo.value.code) == 429
def get_channel_page(env, start_response): path_parts = env['path_parts'] channel_id = path_parts[1] try: tab = path_parts[2] except IndexError: tab = 'videos' parameters = env['parameters'] page_number = int(util.default_multi_get(parameters, 'page', 0, default='1')) sort = util.default_multi_get(parameters, 'sort', 0, default='3') view = util.default_multi_get(parameters, 'view', 0, default='1') query = util.default_multi_get(parameters, 'query', 0, default='') if tab == 'videos': tasks = ( gevent.spawn(get_number_of_videos, channel_id ), gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view) ) gevent.joinall(tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value result = channel_videos_html(polymer_json, page_number, sort, number_of_videos, env['QUERY_STRING']) elif tab == 'about': polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1) polymer_json = json.loads(polymer_json) result = channel_about_page(polymer_json) elif tab == 'playlists': polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1) '''with open('debug/channel_playlists_debug', 'wb') as f: f.write(polymer_json)''' polymer_json = json.loads(polymer_json) result = channel_playlists_html(polymer_json, sort) elif tab == 'search': tasks = ( gevent.spawn(get_number_of_videos, channel_id ), gevent.spawn(get_channel_search_json, channel_id, query, page_number) ) gevent.joinall(tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value result = channel_search_page(polymer_json, query, page_number, number_of_videos, env['QUERY_STRING']) else: start_response('404 Not Found', [('Content-type', 'text/plain'),]) return b'Unknown channel tab: ' + tab.encode('utf-8') start_response('200 OK', [('Content-type','text/html'),]) return result.encode('utf-8')
def get_channel_page(channel_id, tab='videos'): page_number = int(request.args.get('page', 1)) sort = request.args.get('sort', '3') view = request.args.get('view', '1') query = request.args.get('query', '') if tab == 'videos': tasks = ( gevent.spawn(get_number_of_videos, channel_id ), gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view) ) gevent.joinall(tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value elif tab == 'about': polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='channel_about') elif tab == 'playlists': polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1, debug_name='channel_playlists') elif tab == 'search': tasks = ( gevent.spawn(get_number_of_videos, channel_id ), gevent.spawn(get_channel_search_json, channel_id, query, page_number) ) gevent.joinall(tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value else: flask.abort(404, 'Unknown channel tab: ' + tab) info = extract_info(json.loads(polymer_json), tab) post_process_channel_info(info) if tab in ('videos', 'search'): info['number_of_videos'] = number_of_videos info['number_of_pages'] = math.ceil(number_of_videos/30) info['header_playlist_names'] = local_playlist.get_playlist_names() if tab in ('videos', 'playlists'): info['current_sort'] = sort elif tab == 'search': info['search_box_value'] = query return flask.render_template('channel.html', parameters_dictionary = request.args, **info )
def serve_subscription_thumbnail(thumbnail): '''Serves thumbnail from disk if it's been saved already. If not, downloads the thumbnail, saves to disk, and serves it.''' assert thumbnail[-4:] == '.jpg' video_id = thumbnail[0:-4] thumbnail_path = os.path.join(thumbnails_directory, thumbnail) if video_id in existing_thumbnails: try: f = open(thumbnail_path, 'rb') except FileNotFoundError: existing_thumbnails.remove(video_id) else: image = f.read() f.close() return flask.Response(image, mimetype='image/jpeg') url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" try: image = util.fetch_url(url, report_text="Saved thumbnail: " + video_id) except urllib.error.HTTPError as e: print("Failed to download thumbnail for " + video_id + ": " + str(e)) abort(e.code) try: f = open(thumbnail_path, 'wb') except FileNotFoundError: os.makedirs(thumbnails_directory, exist_ok=True) f = open(thumbnail_path, 'wb') f.write(image) f.close() existing_thumbnails.add(video_id) return flask.Response(image, mimetype='image/jpeg')
def decrypt_signatures(info): '''return error string, or False if no errors''' if not yt_data_extract.requires_decryption(info): return False if not info['player_name']: return 'Could not find player name' if not info['base_js']: return 'Failed to find base.js' player_name = info['player_name'] if player_name in decrypt_cache: print('Using cached decryption function for: ' + player_name) info['decryption_function'] = decrypt_cache[player_name] else: base_js = util.fetch_url(info['base_js'], debug_name='base.js', report_text='Fetched player ' + player_name) base_js = base_js.decode('utf-8') err = yt_data_extract.extract_decryption_function(info, base_js) if err: return err decrypt_cache[player_name] = info['decryption_function'] save_decrypt_cache() err = yt_data_extract.decrypt_signatures(info) return err
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1, ctoken=None, print_status=True): message = 'Got channel tab' if print_status else None if not ctoken: ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view) ctoken = ctoken.replace('=', '%3D') # Not sure what the purpose of the key is or whether it will change # For now it seems to be constant for the API endpoint, not dependent # on the browsing session or channel key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' url = 'https://www.youtube.com/youtubei/v1/browse?key=' + key data = { 'context': { 'client': { 'hl': 'en', 'gl': 'US', 'clientName': 'WEB', 'clientVersion': '2.20180830', }, }, 'continuation': ctoken, } content_type_header = (('Content-Type', 'application/json'),) content = util.fetch_url( url, headers_desktop + content_type_header, data=json.dumps(data), debug_name='channel_tab', report_text=message) return content
def get_channel_search_json(channel_id, query, page): offset = proto.unpadded_b64encode(proto.uint(3, (page-1)*30)) params = proto.string(2, 'search') + proto.string(15, offset) params = proto.percent_b64encode(params) ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query) ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii') key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' url = 'https://www.youtube.com/youtubei/v1/browse?key=' + key data = { 'context': { 'client': { 'hl': 'en', 'gl': 'US', 'clientName': 'WEB', 'clientVersion': '2.20180830', }, }, 'continuation': ctoken, } content_type_header = (('Content-Type', 'application/json'),) polymer_json = util.fetch_url( url, headers_desktop + content_type_header, data=json.dumps(data), debug_name='channel_search') return polymer_json
def get_number_of_videos(channel_id): # Uploads playlist playlist_id = 'UU' + channel_id[2:] url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' print("Getting number of videos") # Sometimes retrieving playlist info fails with 403 for no discernable reason try: response = util.fetch_url(url, util.mobile_ua + headers_pbj) except urllib.error.HTTPError as e: if e.code != 403: raise print("Couldn't retrieve number of videos") return 1000 '''with open('debug/playlist_debug_metadata', 'wb') as f: f.write(response)''' response = response.decode('utf-8') print("Got response for number of videos") match = re.search(r'"numVideosText":\s*{\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response) if match: return int(match.group(1).replace(',','')) else: return 0
def req(url, f_req, note, errnote): data = login_form.copy() data.update({ 'pstMsg': 1, 'checkConnection': 'youtube', 'checkedDomains': 'youtube', 'hl': 'en', 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]', 'f.req': json.dumps(f_req), 'flowName': 'GlifWebSignIn', 'flowEntry': 'ServiceLogin', 'bgRequest': '["identifier",""]', }) headers = { 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8', 'Google-Accounts-XSRF': 1, } headers.update(yt_dl_headers) result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor, debug_name=note).decode('utf-8') result = re.sub(r'^[^\[]*', '', result) return json.loads(result)
def request_comments(ctoken, replies=False): if replies: # let's make it use different urls for no reason despite all the data being encoded base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken=" else: base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken=" url = base_url + ctoken.replace("=", "%3D") + "&pbj=1" for i in range(0, 8): # don't retry more than 8 times content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments") if content[ 0: 4] == b")]}'": # random closing characters included at beginning of response for some reason content = content[4:] elif content[ 0: 10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason content = b'' print("got <!DOCTYPE>, retrying") continue break '''with open('debug/comments_debug', 'wb') as f: f.write(content)''' return content
def get_number_of_videos_channel(channel_id): if channel_id is None: return 1000 # Uploads playlist playlist_id = 'UU' + channel_id[2:] url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' try: response = util.fetch_url(url, headers_mobile, debug_name='number_of_videos', report_text='Got number of videos') except urllib.error.HTTPError as e: traceback.print_exc() print("Couldn't retrieve number of videos") return 1000 response = response.decode('utf-8') match = re.search( r'"numVideosText":\s*{\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response) if match: return int(match.group(1).replace(',', '')) else: return 0
def get_transcript(caption_path): try: captions = util.fetch_url( 'https://www.youtube.com/' + caption_path + '?' + request.environ['QUERY_STRING']).decode('utf-8') except util.FetchError as e: msg = ('Error retrieving captions: ' + str(e) + '\n\n' + 'The caption url may have expired.') print(msg) return flask.Response(msg, status=e.code, mimetype='text/plain;charset=UTF-8') lines = captions.splitlines() segments = [] # skip captions file header i = 0 while lines[i] != '': i += 1 current_segment = None while i < len(lines): line = lines[i] if line == '': if ((current_segment is not None) and (current_segment['begin'] is not None)): segments.append(current_segment) current_segment = { 'begin': None, 'end': None, 'lines': [], } elif times_reg.fullmatch(line.rstrip()): current_segment['begin'], current_segment['end'] = line.split( ' --> ') else: current_segment['lines'].append( inner_timestamp_removal_reg.sub('', line)) i += 1 # if automatic captions, but not translated if request.args.get('kind') == 'asr' and not request.args.get('tlang'): # Automatic captions repeat content. The new segment is displayed # on the bottom row; the old one is displayed on the top row. # So grab the bottom row only for seg in segments: seg['text'] = seg['lines'][1] else: for seg in segments: seg['text'] = ' '.join(map(str.rstrip, seg['lines'])) result = '' for seg in segments: if seg['text'] != ' ': result += seg['begin'] + ' ' + seg['text'] + '\r\n' return flask.Response(result.encode('utf-8'), mimetype='text/plain;charset=UTF-8')
def _get_atoma_feed(channel_id): url = 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id try: return util.fetch_url(url).decode('utf-8') except util.FetchError as e: # 404 is expected for terminated channels if e.code in ('404', '429'): return '' raise
def get_channel_search_json(channel_id, query, page): params = proto.string(2, 'search') + proto.string(15, str(page)) params = proto.percent_b64encode(params) ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query) ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii') polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1, debug_name='channel_search') return polymer_json
def playlist_first_page(playlist_id, report_text="Retrieved playlist"): url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text, debug_name='playlist_first_page') content = json.loads(util.uppercase_escape(content.decode('utf-8'))) return content
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1): ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D') url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken print("Sending channel tab ajax request") content = util.fetch_url(url, util.desktop_ua + headers_1, debug_name='channel_tab') print("Finished recieving channel tab response") return content
def get_channel_id(base_url): # method that gives the smallest possible response at ~4 kb # needs to be as fast as possible base_url = base_url.replace('https://www', 'https://m') # avoid redirect response = util.fetch_url(base_url + '/about?pbj=1', headers_mobile, debug_name='get_channel_id', report_text='Got channel id').decode('utf-8') match = channel_id_re.search(response) if match: return match.group(1) return None
def playlist_first_page(playlist_id, report_text="Retrieved playlist"): url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text) '''with open('debug/playlist_debug', 'wb') as f: f.write(content)''' content = json.loads(util.uppercase_escape(content.decode('utf-8'))) return content
def get_channel_search_json(channel_id, query, page): params = proto.string(2, 'search') + proto.string(15, str(page)) params = proto.percent_b64encode(params) ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query) ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii') polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1) '''with open('debug/channel_search_debug', 'wb') as f: f.write(polymer_json)''' polymer_json = json.loads(polymer_json) return polymer_json
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1): ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D') url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken print("Sending channel tab ajax request") content = util.fetch_url(url, util.desktop_ua + headers_1) print("Finished recieving channel tab response") '''with open('debug/channel_debug', 'wb') as f: f.write(content)''' info = json.loads(content) return info
def get_channel_page_general_url(env, start_response): path_parts = env['path_parts'] is_toplevel = not path_parts[0] in ('user', 'c') if len(path_parts) + int(is_toplevel) == 3: # has /[page] after it page = path_parts[2] base_url = 'https://www.youtube.com/' + '/'.join(path_parts[0:-1]) elif len(path_parts) + int(is_toplevel) == 2: # does not have /[page] after it, use /videos by default page = 'videos' base_url = 'https://www.youtube.com/' + '/'.join(path_parts) else: start_response('404 Not Found', [('Content-type', 'text/plain'),]) return b'Invalid channel url' if page == 'videos': polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1) '''with open('debug/user_page_videos', 'wb') as f: f.write(polymer_json)''' polymer_json = json.loads(polymer_json) result = channel_videos_html(polymer_json) elif page == 'about': polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1) polymer_json = json.loads(polymer_json) result = channel_about_page(polymer_json) elif page == 'playlists': polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1) polymer_json = json.loads(polymer_json) result = channel_playlists_html(polymer_json) elif page == 'search': raise NotImplementedError() '''polymer_json = util.fetch_url('https://www.youtube.com/user' + username + '/search?pbj=1&' + query_string, util.desktop_ua + headers_1) polymer_json = json.loads(polymer_json) return channel_search_page(''' else: start_response('404 Not Found', [('Content-type', 'text/plain'),]) return b'Unknown channel page: ' + page.encode('utf-8') start_response('200 OK', [('Content-type','text/html'),]) return result.encode('utf-8')
def get_videos(playlist_id, page): url = "https://m.youtube.com/playlist?ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1" headers = { 'User-Agent': ' Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'X-YouTube-Client-Name': '2', 'X-YouTube-Client-Version': '2.20180508', } content = util.fetch_url(url, headers, report_text="Retrieved playlist", debug_name='playlist_videos') info = json.loads(util.uppercase_escape(content.decode('utf-8'))) return info
def download_thumbnail(playlist_name, video_id): url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg") try: thumbnail = util.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id) except urllib.error.HTTPError as e: print("Failed to download thumbnail for " + video_id + ": " + str(e)) return try: f = open(save_location, 'wb') except FileNotFoundError: os.makedirs(os.path.join(thumbnails_directory, playlist_name)) f = open(save_location, 'wb') f.write(thumbnail) f.close()
def request_comments(ctoken, replies=False): base_url = 'https://m.youtube.com/watch_comment?' if replies: base_url += 'action_get_comment_replies=1&ctoken=' else: base_url += 'action_get_comments=1&ctoken=' url = base_url + ctoken.replace("=", "%3D") + "&pbj=1" content = util.fetch_url(url, headers=mobile_headers, report_text='Retrieved comments', debug_name='request_comments') content = content.decode('utf-8') polymer_json = json.loads(content) return polymer_json
def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookiejar): headers = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'X-YouTube-Client-Name': '2', 'X-YouTube-Client-Version': '2.20180823', 'Content-Type': 'application/x-www-form-urlencoded', } comment_params = proto.string(2, video_id) + proto.string( 4, parent_comment_id) + proto.nested(5, proto.uint(1, 0)) + proto.uint( 6, 0) + proto.uint(10, 1) comment_params = proto.percent_b64encode(comment_params).decode('ascii') sej = json.dumps({ "clickTrackingParams": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=", "commandMetadata": { "webCommandMetadata": { "url": "/service_ajax", "sendPost": True } }, "createCommentReplyEndpoint": { "createReplyParams": comment_params } }) data_dict = { 'comment_text': text, 'sej': sej, 'session_token': session_token, } data = urllib.parse.urlencode(data_dict).encode() content = util.fetch_url( "https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar) code = json.loads(content)['code'] print("Comment posting code: " + code) return code '''with open('debug/post_comment_response', 'wb') as f:
def get_session_token(video_id, cookiejar): ''' Get session token for a video. This is required in order to post/edit/delete comments. This will modify cookiejar with cookies from youtube required for commenting''' # youtube-dl uses disable_polymer=1 which uses a different request format which has an obfuscated javascript algorithm to generate a parameter called "bgr" # Tokens retrieved from disable_polymer pages only work with that format. Tokens retrieved on mobile only work using mobile requests # Additionally, tokens retrieved without sending the same cookie won't work. So this is necessary even if the bgr and stuff was reverse engineered. headers = {'User-Agent': util.mobile_user_agent} mobile_page = util.fetch_url( 'https://m.youtube.com/watch?v=' + video_id, headers, report_text="Retrieved session token for comment", cookiejar_send=cookiejar, cookiejar_receive=cookiejar).decode() match = xsrf_token_regex.search(mobile_page) if match: return match.group(1).replace("%3D", "=") else: raise Exception("Couldn't find xsrf_token")
def proxy_site(env, start_response): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)', 'Accept': '*/*', } url = "https://" + env['SERVER_NAME'] + env['PATH_INFO'] if env['QUERY_STRING']: url += '?' + env['QUERY_STRING'] content, response = util.fetch_url(url, headers, return_response=True) headers = response.getheaders() if isinstance(headers, urllib3._collections.HTTPHeaderDict): headers = headers.items() start_response('200 OK', headers) return content