def scrapeWebPageForVideoLinks(data): logger.info("") links = {} fmt_value = { 5: "240p h263 flv", 6: "240p h263 flv", 18: "360p h264 mp4", 22: "720p h264 mp4", 26: "???", 33: "???", 34: "360p h264 flv", 35: "480p h264 flv", 36: "3gpp", 37: "1080p h264 mp4", 38: "4K h264 mp4", 43: "360p vp8 webm", 44: "480p vp8 webm", 45: "720p vp8 webm", 46: "1080p vp8 webm", 59: "480p h264 mp4", 78: "480p h264 mp4", 82: "360p h264 3D", 83: "480p h264 3D", 84: "720p h264 3D", 85: "1080p h264 3D", 100: "360p vp8 3D", 101: "480p vp8 3D", 102: "720p vp8 3D" } video_urls=[] flashvars = extractFlashVars(data) if not flashvars.has_key(u"url_encoded_fmt_stream_map"): return links if flashvars.has_key(u"ttsurl"): logger.info("ttsurl="+flashvars[u"ttsurl"]) if flashvars.has_key('hlsvp'): url = flashvars[u"hlsvp"] video_urls.append( [ "(LIVE .m3u8) [youtube]" , url ]) return video_urls js_signature = "" data_flashvars = flashvars[u"url_encoded_fmt_stream_map"].split(u",") for url_desc in data_flashvars: url_desc_map = cgi.parse_qs(url_desc) logger.info(u"url_map: " + repr(url_desc_map)) if not (url_desc_map.has_key(u"url") or url_desc_map.has_key(u"stream")): continue try: key = int(url_desc_map[u"itag"][0]) if not fmt_value.get(key): continue url = u"" if url_desc_map.has_key(u"url"): url = urllib.unquote(url_desc_map[u"url"][0]) elif url_desc_map.has_key(u"conn") and url_desc_map.has_key(u"stream"): url = urllib.unquote(url_desc_map[u"conn"][0]) if url.rfind("/") < len(url) -1: url = url + "/" url = url + urllib.unquote(url_desc_map[u"stream"][0]) elif url_desc_map.has_key(u"stream") and not url_desc_map.has_key(u"conn"): url = urllib.unquote(url_desc_map[u"stream"][0]) if url_desc_map.has_key(u"sig"): url = url + u"&signature=" + url_desc_map[u"sig"][0] elif url_desc_map.has_key(u"s"): sig = url_desc_map[u"s"][0] if not js_signature: urljs = scrapertools.find_single_match(data, '"assets":.*?"js":\s*"([^"]+)"') urljs = urljs.replace("\\", "") if urljs: data_js = scrapertools.downloadpage("http:"+urljs) from jsinterpreter import JSInterpreter funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(') jsi = JSInterpreter(data_js) js_signature = jsi.extract_function(funcname) signature = js_signature([sig]) url += u"&signature=" + signature # Se encodean las comas para que no falle en método built-in url = url.replace(",", "%2C") video_urls.append( [ "("+fmt_value[key]+") [youtube]" , url ]) except: import traceback logger.info(traceback.format_exc()) return video_urls
def extract_videos(video_id): fmt_value = { 5: "240p h263 flv", 6: "240p h263 flv", 18: "360p h264 mp4", 22: "720p h264 mp4", 26: "???", 33: "???", 34: "360p h264 flv", 35: "480p h264 flv", 36: "3gpp", 37: "1080p h264 mp4", 38: "4K h264 mp4", 43: "360p vp8 webm", 44: "480p vp8 webm", 45: "720p vp8 webm", 46: "1080p vp8 webm", 59: "480p h264 mp4", 78: "480p h264 mp4", 82: "360p h264 3D", 83: "480p h264 3D", 84: "720p h264 3D", 85: "1080p h264 3D", 100: "360p vp8 3D", 101: "480p vp8 3D", 102: "720p vp8 3D" } url = 'http://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % \ (video_id, video_id) data = httptools.downloadpage(url).data video_urls = [] params = dict(urlparse.parse_qsl(data)) if params.get('hlsvp'): video_urls.append(["(LIVE .m3u8) [youtube]", params['hlsvp']]) return video_urls if config.is_xbmc(): import xbmc xbmc_version = int( xbmc.getInfoLabel("System.BuildVersion").split(".", 1)[0]) if xbmc_version > 16 and xbmc.getCondVisibility('System.HasAddon(inputstream.adaptive)') \ and params.get('dashmpd'): if params.get('use_cipher_signature', '') != 'True': video_urls.append( ['mpd HD [youtube]', params['dashmpd'], 0, '', True]) js_signature = "" youtube_page_data = httptools.downloadpage( "http://www.youtube.com/watch?v=%s" % video_id).data params = extract_flashvars(youtube_page_data) if params.get('url_encoded_fmt_stream_map'): data_flashvars = params["url_encoded_fmt_stream_map"].split(",") for url_desc in data_flashvars: url_desc_map = dict(urlparse.parse_qsl(url_desc)) if not url_desc_map.get("url") and not url_desc_map.get("stream"): continue try: key = int(url_desc_map["itag"]) if not fmt_value.get(key): continue if url_desc_map.get("url"): url = urllib.unquote(url_desc_map["url"]) elif url_desc_map.get("conn") and url_desc_map.get("stream"): url = urllib.unquote(url_desc_map["conn"]) if url.rfind("/") < len(url) - 1: url += "/" url += urllib.unquote(url_desc_map["stream"]) elif url_desc_map.get( "stream") and not url_desc_map.get("conn"): url = urllib.unquote(url_desc_map["stream"]) if url_desc_map.get("sig"): url += "&signature=" + url_desc_map["sig"] elif url_desc_map.get("s"): sig = url_desc_map["s"] if not js_signature: urljs = scrapertools.find_single_match( youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"') urljs = urljs.replace("\\", "") if urljs: if not re.search(r'https?://', urljs): urljs = urlparse.urljoin( "https://www.youtube.com", urljs) data_js = httptools.downloadpage(urljs).data from jsinterpreter import JSInterpreter funcname = scrapertools.find_single_match( data_js, '\.sig\|\|([A-z0-9$]+)\(') if not funcname: funcname = scrapertools.find_single_match( data_js, '["\']signature["\']\s*,\s*' '([A-z0-9$]+)\(') jsi = JSInterpreter(data_js) js_signature = jsi.extract_function(funcname) signature = js_signature([sig]) url += "&signature=" + signature url = url.replace(",", "%2C") video_urls.append(["(" + fmt_value[key] + ") [youtube]", url]) except: import traceback logger.info(traceback.format_exc()) return video_urls
def get_video_url(item): logger.trace() itemlist = [] video_id = scrapertools.find_single_match(item.url, 'v=([A-z0-9_-]{11})') url = 'http://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % ( video_id, video_id) data = httptools.downloadpage(url).data params = dict(urlparse.parse_qsl(data)) if params.get('hlsvp'): itemlist.append(Video(type='Live', url=params['hlsvp'])) return itemlist if params.get('dashmpd') and params.get('use_cipher_signature', '') != 'True': itemlist.append(Video(type='MPD', url=params['dashmpd'], mpd=True)) js_signature = "" youtube_page_data = httptools.downloadpage( "http://www.youtube.com/watch?v=%s" % video_id).data params = extract_flashvars(youtube_page_data) if params.get('url_encoded_fmt_stream_map'): data_flashvars = params["url_encoded_fmt_stream_map"].split(",") for url_desc in data_flashvars: url_desc_map = dict(urlparse.parse_qsl(url_desc)) if not url_desc_map.get("url") and not url_desc_map.get("stream"): continue try: key = int(url_desc_map["itag"]) if not fmt_value.get(key): continue if url_desc_map.get("url"): url = urllib.unquote(url_desc_map["url"]) elif url_desc_map.get("conn") and url_desc_map.get("stream"): url = urllib.unquote(url_desc_map["conn"]) if url.rfind("/") < len(url) - 1: url += "/" url += urllib.unquote(url_desc_map["stream"]) elif url_desc_map.get( "stream") and not url_desc_map.get("conn"): url = urllib.unquote(url_desc_map["stream"]) if url_desc_map.get("sig"): url += "&signature=" + url_desc_map["sig"] elif url_desc_map.get("s"): sig = url_desc_map["s"] if not js_signature: urljs = scrapertools.find_single_match( youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"') urljs = urljs.replace("\\", "") if urljs: if not re.search(r'https?://', urljs): urljs = urlparse.urljoin( "https://www.youtube.com", urljs) data_js = httptools.downloadpage(urljs).data from jsinterpreter import JSInterpreter funcname = scrapertools.find_single_match( data_js, '\.sig\|\|([A-z0-9$]+)\(') if not funcname: funcname = scrapertools.find_single_match( data_js, '["\']signature["\']\s*,\s*' '([A-z0-9$]+)\(') jsi = JSInterpreter(data_js) js_signature = jsi.extract_function(funcname) signature = js_signature([sig]) url += "&signature=" + signature url = url.replace(",", "%2C") itemlist.append( Video(type=fmt_value[key][1], res=fmt_value[key][0], url=url)) except: logger.error() return itemlist