def _real_extract(self, url, allowVP9=False, allowAgeGate=False): # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: #https url = 'http://www.youtube.com/' + compat_urllib_parse.unquote( mobj.group(1)).lstrip('/') video_id = self._extract_id(url) player_response = None if 'yt-video-id' == video_id: video_id = self.cm.ph.getSearchGroups( url + '&', '[\?&]docid=([^\?^&]+)[\?&]')[0] isGoogleDoc = True url = url videoKey = 'docid' COOKIE_FILE = GetCookieDir('docs.google.com.cookie') videoInfoparams = { 'cookiefile': COOKIE_FILE, 'use_cookie': True, 'load_cookie': False, 'save_cookie': True } sts, video_webpage = self.cm.getPage(url) else: url = 'https://www.youtube.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' isGoogleDoc = False videoKey = 'video_id' videoInfoparams = {} http_params = { 'header': { 'Content-Type': 'application/json', 'Origin': 'https://www.youtube.com', 'X-YouTube-Client-Name': '3', 'X-YouTube-Client-Version': '16.20' } } http_params['raw_post_data'] = True post_data = "{'videoId': '%s', 'context': {'client': {'hl': 'en', 'clientVersion': '16.20', 'clientName': 'ANDROID'}}}" % video_id sts, video_webpage = self.cm.getPage(url, http_params, post_data) if sts: if allowAgeGate and 'LOGIN_REQUIRED' in video_webpage: post_data = "{'videoId': '%s', 'thirdParty': 'https://google.com', 'context': {'client': {'hl': 'en', 'clientScreen': 'EMBED', 'clientVersion': '16.20', 'clientName': 'ANDROID'}}}" % video_id sts, video_webpage = self.cm.getPage( url, http_params, post_data) player_response = json_loads(video_webpage) else: url = 'http://www.youtube.com/watch?v=%s&bpctr=9999999999&has_verified=1&' % video_id sts, video_webpage = self.cm.getPage(url) if sts: player_response = self._extract_yt_initial_variable( video_webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') if not sts: raise ExtractorError('Unable to download video webpage') if not player_response: raise ExtractorError('Unable to get player response') video_info = player_response['videoDetails'] # subtitles if 'lengthSeconds' not in video_info: video_duration = '' else: video_duration = video_info['lengthSeconds'] url_map = {} video_url_list = {} try: is_m3u8 = 'no' cipher = {} url_data_str = [] url_data_str = player_response['streamingData']['formats'] try: url_data_str += player_response['streamingData'][ 'adaptiveFormats'] except Exception: printExc() for url_data in url_data_str: printDBG(str(url_data)) if 'url' in url_data: url_item = {'url': url_data['url']} else: cipher = url_data.get('cipher', '') + url_data.get( 'signatureCipher', '') printDBG(cipher) cipher = cipher.split('&') for item in cipher: #sig_item = '' #s_item = '' #sp_item = '' if 'url=' in item: url_item = { 'url': _unquote(item.replace('url=', ''), None) } if 'sig=' in item: sig_item = item.replace('sig=', '') if 's=' in item: s_item = item.replace('s=', '') if 'sp=' in item: sp_item = item.replace('sp=', '') if 'sig' in cipher: signature = sig_item url_item['url'] += '&signature=' + signature elif len(s_item): url_item['esign'] = _unquote(s_item) if len(sp_item): url_item['url'] += '&%s={0}' % sp_item else: url_item['url'] += '&signature={0}' if not 'ratebypass' in url_item['url']: url_item['url'] += '&ratebypass=yes' url_map[str(url_data['itag'])] = url_item video_url_list = self._get_video_url_list(url_map, allowVP9) except Exception: printExc() if video_info.get('isLive') and not video_url_list: is_m3u8 = 'yes' manifest_url = _unquote( player_response['streamingData']['hlsManifestUrl'], None) url_map = self._extract_from_m3u8(manifest_url, video_id) video_url_list = self._get_video_url_list(url_map, allowVP9) if not video_url_list: return [] signItems = [] signatures = [] for idx in range(len(video_url_list)): if 'esign' in video_url_list[idx][1]: signItems.append(video_url_list[idx][1]) signatures.append(video_url_list[idx][1]['esign']) if len(signatures): # decrypt signatures printDBG("signatures: %s" % signatures) playerUrl = '' tmp = ph.find(video_webpage, ('<script', '>', 'player/base'))[1] playerUrl = ph.getattr(tmp, 'src') if not playerUrl: for reObj in [ '"assets"\:[^\}]+?"js"\s*:\s*"([^"]+?)"', 'src="([^"]+?)"[^>]+?name="player.*?/base"', '"jsUrl":"([^"]+?)"' ]: playerUrl = ph.search(video_webpage, reObj)[0] if playerUrl: break playerUrl = self.cm.getFullUrl(playerUrl.replace('\\', ''), self.cm.meta['url']) if playerUrl: decSignatures = CYTSignAlgoExtractor( self.cm).decryptSignatures(signatures, playerUrl) if len(signatures) == len(signItems): try: for idx in range(len(signItems)): signItems[idx]['url'] = signItems[idx][ 'url'].format(decSignatures[idx]) except Exception: printExc() SetIPTVPlayerLastHostError( _('Decrypt Signatures Error')) return [] else: return [] if isGoogleDoc: cookieHeader = self.cm.getCookieHeader(COOKIE_FILE) sub_tracks = self._get_subtitles(video_id) results = [] for format_param, url_item in video_url_list: # Extension video_extension = self._video_extensions.get(format_param, 'flv') #video_format = '{0} - {1}'.format(format_param if format_param else video_extension, # self._video_dimensions.get(format_param, '???')) video_format = self._video_dimensions.get(format_param, '???') video_real_url = url_item['url'] if len(sub_tracks): video_real_url = strwithmeta( video_real_url, {'external_sub_tracks': sub_tracks}) if isGoogleDoc: video_real_url = strwithmeta(video_real_url, {'Cookie': cookieHeader}) results.append({ 'id': video_id, 'url': video_real_url, 'uploader': '', 'title': '', 'ext': video_extension, 'format': video_format, 'thumbnail': '', 'duration': video_duration, 'player_url': '', 'm3u8': is_m3u8, }) return results
def _real_extract(self, url, allowVP9=False, allowAgeGate=False): # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: #https url = 'http://www.youtube.com/' + compat_urllib_parse.unquote( mobj.group(1)).lstrip('/') video_id = self._extract_id(url) if 'yt-video-id' == video_id: video_id = self.cm.ph.getSearchGroups( url + '&', '[\?&]docid=([^\?^&]+)[\?&]')[0] isGoogleDoc = True url = url videoKey = 'docid' videoInfoBase = 'https://docs.google.com/get_video_info?docid=%s' % video_id COOKIE_FILE = GetCookieDir('docs.google.com.cookie') videoInfoparams = { 'cookiefile': COOKIE_FILE, 'use_cookie': True, 'load_cookie': False, 'save_cookie': True } else: url = 'http://www.youtube.com/watch?v=%s&' % video_id isGoogleDoc = False videoKey = 'video_id' videoInfoBase = 'https://www.youtube.com/get_video_info?video_id=%s&' % video_id videoInfoparams = {} sts, video_webpage = self.cm.getPage(url) if not sts: raise ExtractorError('Unable to download video webpage') # Get video info #if re.search(r'player-age-gate-content">', video_webpage) is not None: if allowAgeGate and re.search(r'"LOGIN_REQUIRED"', video_webpage) is not None: #self.report_age_confirmation() age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} # this can be viewed without login into Youtube data = compat_urllib_parse.urlencode({ 'el': 'embedded', 'gl': 'US', 'hl': 'en', 'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'asv': 3, 'sts': '1588', }) video_info_url = videoInfoBase + data sts, video_info = self.cm.getPage(video_info_url, videoInfoparams) if not sts: raise ExtractorError('Faile to get "%s"' % video_info_url) else: age_gate = False for el_type in ['&el=detailpage', '&el=embedded', '&el=vevo', '']: #https video_info_url = videoInfoBase + ( '%s&ps=default&eurl=&gl=US&hl=en' % (el_type)) sts, video_info = self.cm.getPage(video_info_url, videoInfoparams) if not sts: continue if 'channel_creation_token' in video_info or '&account_playback_token=' in video_info: break if 'channel_creation_token' not in video_info and '&account_playback_token=' not in video_info: raise ExtractorError('"token" parameter not in video info') # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: raise ExtractorError('"rental" videos not supported') # Start extracting information video_info = video_info.split('&') video_info2 = {} for item in video_info: item = item.split('=') if len(item) < 2: continue video_info2[item[0].strip()] = item[1].strip() video_info = video_info2 del video_info2 dashmpd = str(_unquote(str(video_info.get('dashmpd', '')), None)) # subtitles if 'length_seconds' not in video_info: video_duration = '' else: video_duration = video_info['length_seconds'] if 'url_encoded_fmt_stream_map' in video_info: video_info['url_encoded_fmt_stream_map'] = [ _unquote(video_info['url_encoded_fmt_stream_map'], None) ] if 'adaptive_fmts' in video_info: video_info['adaptive_fmts'] = [ _unquote(video_info['adaptive_fmts'], None) ] try: mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) if not mobj: raise ValueError('Could not find vevo ID') ytplayer_config = json_loads(mobj.group(1)) args = ytplayer_config['args'] # Easy way to know if the 's' value is in url_encoded_fmt_stream_map # this signatures are encrypted if 'url_encoded_fmt_stream_map' not in args: raise ValueError('No stream_map present') # caught below re_signature = re.compile(r'[&,]s=') m_s = re_signature.search(args['url_encoded_fmt_stream_map']) if m_s is not None: printDBG('%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [ args['url_encoded_fmt_stream_map'] ] m_s = re_signature.search(args.get('adaptive_fmts', '')) except ValueError: pass # Decide which formats to download is_m3u8 = 'no' url_map = {} video_url_list = {} if len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len( video_info.get('adaptive_fmts', [])) >= 1: encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get( 'adaptive_fmts', [''])[0] _supported_formats = self._supported_formats if allowVP9: _supported_formats.extend(['313', '271']) for url_data_str in encoded_url_map.split(','): if 'index=' in url_data_str and 'index=0-0&' in url_data_str: continue if 'itag=' in url_data_str and 'url=' in url_data_str: url_data_str = url_data_str.split('&') url_data = {} supported = False for item in url_data_str: item = item.split('=') if len(item) < 2: continue key = item[1].strip() if item[0] == 'itag': if key in self._supported_formats: supported = True else: break url_data[item[0]] = key if not supported: continue url_item = {'url': _unquote(url_data['url'], None)} if 'sig' in url_data: signature = url_data['sig'] url_item['url'] += '&signature=' + signature elif 's' in url_data: url_item['esign'] = _unquote(url_data['s']) if 'sp' in url_data: url_item['url'] += '&%s={0}' % url_data['sp'] else: url_item['url'] += '&signature={0}' if not 'ratebypass' in url_item['url']: url_item['url'] += '&ratebypass=yes' url_map[url_data['itag']] = url_item video_url_list = self._get_video_url_list(url_map, allowVP9) if video_info.get('hlsvp') and not video_url_list: is_m3u8 = 'yes' manifest_url = _unquote(video_info['hlsvp'], None) url_map = self._extract_from_m3u8(manifest_url, video_id) video_url_list = self._get_video_url_list(url_map, allowVP9) if video_info.get('player_response') and not video_url_list: is_m3u8 = 'yes' manifest_url = _unquote(video_info['player_response'], None) manifest = re.search('"hlsManifestUrl":"(.*?)"', manifest_url) if manifest: manifest_url = manifest.group(1) url_map = self._extract_from_m3u8(manifest_url, video_id) video_url_list = self._get_video_url_list(url_map, allowVP9) if video_info.get('player_response') and not video_url_list: try: is_m3u8 = 'no' cipher = {} url_data_str = json_loads( _unquote(video_info['player_response'], None))['streamingData']['formats'] try: url_data_str += json_loads( _unquote(video_info['player_response'], None))['streamingData']['adaptiveFormats'] except Exception: printExc() for url_data in url_data_str: printDBG(str(url_data)) if 'url' in url_data: url_item = {'url': url_data['url']} else: cipher = url_data.get('cipher', '') + url_data.get( 'signatureCipher', '') printDBG(cipher) cipher = cipher.split('&') for item in cipher: #sig_item = '' #s_item = '' #sp_item = '' if 'url=' in item: url_item = { 'url': _unquote(item.replace('url=', ''), None) } if 'sig=' in item: sig_item = item.replace('sig=', '') if 's=' in item: s_item = item.replace('s=', '') if 'sp=' in item: sp_item = item.replace('sp=', '') if 'sig' in cipher: signature = sig_item url_item['url'] += '&signature=' + signature elif len(s_item): url_item['esign'] = _unquote(s_item) if len(sp_item): url_item['url'] += '&%s={0}' % sp_item else: url_item['url'] += '&signature={0}' if not 'ratebypass' in url_item['url']: url_item['url'] += '&ratebypass=yes' url_map[str(url_data['itag'])] = url_item video_url_list = self._get_video_url_list(url_map, allowVP9) except Exception: printExc() if not video_url_list: return [] if self.cm.isValidUrl(dashmpd): sign = ph.search(dashmpd, r'/s/([a-fA-F0-9\.]+)')[0] if sign: dashmpd = dashmpd.replace(sign, '{0}') video_url_list.append(('mpd', {'url': dashmpd})) if sign: video_url_list[-1][1]['esign'] = sign signItems = [] signatures = [] for idx in range(len(video_url_list)): if 'esign' in video_url_list[idx][1]: signItems.append(video_url_list[idx][1]) signatures.append(video_url_list[idx][1]['esign']) if len(signatures): # decrypt signatures printDBG("signatures: %s" % signatures) playerUrl = '' tmp = ph.find(video_webpage, ('<script', '>', 'player/base'))[1] playerUrl = ph.getattr(tmp, 'src') if not playerUrl: for reObj in [ '"assets"\:[^\}]+?"js"\s*:\s*"([^"]+?)"', 'src="([^"]+?)"[^>]+?name="player.*?/base"' ]: playerUrl = ph.search(video_webpage, reObj)[0] if playerUrl: break playerUrl = self.cm.getFullUrl(playerUrl.replace('\\', ''), self.cm.meta['url']) if playerUrl: decSignatures = CYTSignAlgoExtractor( self.cm).decryptSignatures(signatures, playerUrl) if len(signatures) == len(signItems): try: for idx in range(len(signItems)): signItems[idx]['url'] = signItems[idx][ 'url'].format(decSignatures[idx]) except Exception: printExc() SetIPTVPlayerLastHostError( _('Decrypt Signatures Error')) return [] else: return [] if isGoogleDoc: cookieHeader = self.cm.getCookieHeader(COOKIE_FILE) sub_tracks = self._get_subtitles(video_id) results = [] for format_param, url_item in video_url_list: # Extension video_extension = self._video_extensions.get(format_param, 'flv') #video_format = '{0} - {1}'.format(format_param if format_param else video_extension, # self._video_dimensions.get(format_param, '???')) video_format = self._video_dimensions.get(format_param, '???') video_real_url = url_item['url'] if len(sub_tracks): video_real_url = strwithmeta( video_real_url, {'external_sub_tracks': sub_tracks}) if isGoogleDoc: video_real_url = strwithmeta(video_real_url, {'Cookie': cookieHeader}) results.append({ 'id': video_id, 'url': video_real_url, 'uploader': '', 'title': '', 'ext': video_extension, 'format': video_format, 'thumbnail': '', 'duration': video_duration, 'player_url': '', 'm3u8': is_m3u8, }) return results
def _real_extract(self, url, ): # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: #https url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') video_id = self._extract_id(url) # Get video webpage self.report_video_webpage_download(video_id) url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id sts, video_webpage_bytes = self.cm.getPage(url) if not sts: raise ExtractorError('Unable to download video webpage') video_webpage = video_webpage_bytes.decode('utf-8', 'ignore') # Attempt to extract SWF player URL mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) if mobj is not None: player_url = re.sub(r'\\(.)', r'\1', mobj.group(1)) else: player_url = '' # Get video info self.report_video_info_webpage_download(video_id) if re.search(r'player-age-gate-content">', video_webpage) is not None: self.report_age_confirmation() age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} # this can be viewed without login into Youtube data = compat_urllib_parse.urlencode({'video_id': video_id, 'el': 'embedded', 'gl': 'US', 'hl': 'en', 'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'asv': 3, 'sts':'1588', }) video_info_url = 'https://www.youtube.com/get_video_info?' + data video_info = self._download_webpage(video_info_url, video_id, note=False, errnote='unable to download video info webpage') else: age_gate = False for el_type in ['&el=detailpage', '&el=embedded', '&el=vevo', '']: #https video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) video_info = self._download_webpage(video_info_url, video_id, note=False, errnote='unable to download video info webpage') if '&token=' in video_info: break if '&token=' not in video_info: if 'reason' in video_info: pass # ToDo extract reason raise ExtractorError('"token" parameter not in video info') # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: printDBG('"rental" videos not supported') raise ExtractorError('"rental" videos not supported') # Start extracting information self.report_information_extraction(video_id) video_info = video_info.split('&') video_info2 = {} for item in video_info: item = item.split('=') if len(item) < 2: continue video_info2[item[0].strip()] = item[1].strip() video_info = video_info2 del video_info2 # subtitles if 'length_seconds' not in video_info: video_duration = '' else: video_duration = video_info['length_seconds'] if 'url_encoded_fmt_stream_map' in video_info: video_info['url_encoded_fmt_stream_map'] = [_unquote(video_info['url_encoded_fmt_stream_map'])] if 'adaptive_fmts' in video_info: video_info['adaptive_fmts'] = [_unquote(video_info['adaptive_fmts'])] # Decide which formats to download try: mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) if not mobj: raise ValueError('Could not find vevo ID') ytplayer_config = json.loads(mobj.group(1)) args = ytplayer_config['args'] # Easy way to know if the 's' value is in url_encoded_fmt_stream_map # this signatures are encrypted if 'url_encoded_fmt_stream_map' not in args: raise ValueError(u'No stream_map present') # caught below re_signature = re.compile(r'[&,]s=') m_s = re_signature.search(args['url_encoded_fmt_stream_map']) if m_s is not None: self.to_screen(u'%s: Encrypted signatures detected.' % video_id) video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] m_s = re_signature.search(args.get('adaptive_fmts', '')) except ValueError: pass # Decide which formats to download req_format = 'all' is_m3u8 = 'no' url_map = {} if len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] if 'rtmpe%3Dyes' in encoded_url_map: printDBG('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.') raise for url_data_str in encoded_url_map.split(','): add = True if 'itag=' in url_data_str and 'url=' in url_data_str: url_data_str = url_data_str.split('&') url_data = {} supported = False for item in url_data_str: item = item.split('=') if len(item) < 2: continue key = item[1].strip() if item[0] == 'itag': if key in self._supported_formats: supported = True else: break url_data[item[0]] = key if not supported: continue url = _unquote(url_data['url']) if 'sig' in url_data: signature = url_data['sig'] url += '&signature=' + signature elif 's' in url_data: encrypted_sig = url_data['s'] signature = '' match = re.search('"([^"]+?html5player-[^"]+?\.js)"', video_webpage) if None == match: match = re.search('"([^"]+?(?:www|player)-([^/]+)/base\.js)"', video_webpage) if match: playerUrl = match.group(1).replace('\\', '').replace('https:', 'http:') if not playerUrl.startswith('http'): playerUrl = 'http:' + playerUrl global SignAlgoExtractorObj if None == SignAlgoExtractorObj: SignAlgoExtractorObj = CVevoSignAlgoExtractor() signature, eType = SignAlgoExtractorObj.decryptSignature( encrypted_sig, playerUrl ) if '' == signature and eType == 'own': signature, eType = SignAlgoExtractorObj.decryptSignature( encrypted_sig, playerUrl, 'youtube_dl' ) else: printDBG("YT HTML PLAYER not available!") if 0 == len(signature): printDBG("YT signature description problem") add = False url += '&signature=' + signature if not 'ratebypass' in url: url += '&ratebypass=yes' if add: url_map[url_data['itag']] = url video_url_list = self._get_video_url_list(url_map) if video_info.get('hlsvp') and not video_url_list: is_m3u8 = 'yes' manifest_url = _unquote(video_info['hlsvp']) url_map = self._extract_from_m3u8(manifest_url, video_id) video_url_list = self._get_video_url_list(url_map) if not video_url_list: return [] sub_tracks = self._get_subtitles(video_id) results = [] for format_param, video_real_url in video_url_list: # Extension video_extension = self._video_extensions.get(format_param, 'flv') #video_format = '{0} - {1}'.format(format_param if format_param else video_extension, # self._video_dimensions.get(format_param, '???')) video_format = self._video_dimensions.get(format_param, '???') video_real_url = video_real_url.encode('utf-8') if len(sub_tracks): video_real_url = strwithmeta(video_real_url, {'external_sub_tracks':sub_tracks}) results.append({ 'id': video_id.encode('utf-8'), 'url': video_real_url, 'uploader': '', 'title': '', 'ext': video_extension.encode('utf-8'), 'format': video_format.encode('utf-8'), 'thumbnail': '', 'duration': video_duration.encode('utf-8'), 'player_url': player_url.encode('utf-8'), 'm3u8' : is_m3u8.encode('utf-8'), }) return results