示例#1
0
def scrapeWebPageForVideoLinks(data):
    logger.info("")
    links = {}

    fmt_value = {
        5: "240p h263 flv",
        6: "240p h263 flv",
        18: "360p h264 mp4",
        22: "720p h264 mp4",
        26: "???",
        33: "???",
        34: "360p h264 flv",
        35: "480p h264 flv",
        36: "3gpp",
        37: "1080p h264 mp4",
        38: "4K h264 mp4",
        43: "360p vp8 webm",
        44: "480p vp8 webm",
        45: "720p vp8 webm",
        46: "1080p vp8 webm",
        59: "480p h264 mp4",
        78: "480p h264 mp4",
        82: "360p h264 3D",
        83: "480p h264 3D",
        84: "720p h264 3D",
        85: "1080p h264 3D",
        100: "360p vp8 3D",
        101: "480p vp8 3D",
        102: "720p vp8 3D"
    }

    video_urls=[]

    flashvars = extractFlashVars(data)
    if not flashvars.has_key(u"url_encoded_fmt_stream_map"):
        return links

    if flashvars.has_key(u"ttsurl"):
        logger.info("ttsurl="+flashvars[u"ttsurl"])

    if flashvars.has_key('hlsvp'):
        url = flashvars[u"hlsvp"]
        video_urls.append( [ "(LIVE .m3u8) [youtube]" , url ])
        return video_urls
    
    js_signature = ""
    data_flashvars = flashvars[u"url_encoded_fmt_stream_map"].split(u",")
    for url_desc in data_flashvars:
        url_desc_map = cgi.parse_qs(url_desc)
        logger.info(u"url_map: " + repr(url_desc_map))
        if not (url_desc_map.has_key(u"url") or url_desc_map.has_key(u"stream")):
            continue

        try:
            key = int(url_desc_map[u"itag"][0])
            if not fmt_value.get(key):
                continue
            url = u""
            if url_desc_map.has_key(u"url"):
                url = urllib.unquote(url_desc_map[u"url"][0])
            elif url_desc_map.has_key(u"conn") and url_desc_map.has_key(u"stream"):
                url = urllib.unquote(url_desc_map[u"conn"][0])
                if url.rfind("/") < len(url) -1:
                    url = url + "/"
                url = url + urllib.unquote(url_desc_map[u"stream"][0])
            elif url_desc_map.has_key(u"stream") and not url_desc_map.has_key(u"conn"):
                url = urllib.unquote(url_desc_map[u"stream"][0])

            if url_desc_map.has_key(u"sig"):
                url = url + u"&signature=" + url_desc_map[u"sig"][0]
            elif url_desc_map.has_key(u"s"):
                sig = url_desc_map[u"s"][0]
                if not js_signature:
                    urljs = scrapertools.find_single_match(data, '"assets":.*?"js":\s*"([^"]+)"')
                    urljs = urljs.replace("\\", "")
                    if urljs:
                        data_js = scrapertools.downloadpage("http:"+urljs)
                        from jsinterpreter import JSInterpreter
                        funcname = scrapertools.find_single_match(data_js, '\.sig\|\|([A-z0-9$]+)\(')

                        jsi = JSInterpreter(data_js)
                        js_signature = jsi.extract_function(funcname)

                signature = js_signature([sig])
                url += u"&signature=" + signature

            # Se encodean las comas para que no falle en método built-in
            url = url.replace(",", "%2C")
            video_urls.append( [ "("+fmt_value[key]+") [youtube]" , url ])
        except:
            import traceback
            logger.info(traceback.format_exc())

    return video_urls
示例#2
0
def extract_videos(video_id):
    fmt_value = {
        5: "240p h263 flv",
        6: "240p h263 flv",
        18: "360p h264 mp4",
        22: "720p h264 mp4",
        26: "???",
        33: "???",
        34: "360p h264 flv",
        35: "480p h264 flv",
        36: "3gpp",
        37: "1080p h264 mp4",
        38: "4K h264 mp4",
        43: "360p vp8 webm",
        44: "480p vp8 webm",
        45: "720p vp8 webm",
        46: "1080p vp8 webm",
        59: "480p h264 mp4",
        78: "480p h264 mp4",
        82: "360p h264 3D",
        83: "480p h264 3D",
        84: "720p h264 3D",
        85: "1080p h264 3D",
        100: "360p vp8 3D",
        101: "480p vp8 3D",
        102: "720p vp8 3D"
    }

    url = 'http://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % \
          (video_id, video_id)
    data = httptools.downloadpage(url).data

    video_urls = []
    params = dict(urlparse.parse_qsl(data))
    if params.get('hlsvp'):
        video_urls.append(["(LIVE .m3u8) [youtube]", params['hlsvp']])
        return video_urls

    if config.is_xbmc():
        import xbmc
        xbmc_version = int(
            xbmc.getInfoLabel("System.BuildVersion").split(".", 1)[0])
        if xbmc_version > 16 and xbmc.getCondVisibility('System.HasAddon(inputstream.adaptive)') \
                             and params.get('dashmpd'):
            if params.get('use_cipher_signature', '') != 'True':
                video_urls.append(
                    ['mpd  HD [youtube]', params['dashmpd'], 0, '', True])

    js_signature = ""
    youtube_page_data = httptools.downloadpage(
        "http://www.youtube.com/watch?v=%s" % video_id).data
    params = extract_flashvars(youtube_page_data)
    if params.get('url_encoded_fmt_stream_map'):
        data_flashvars = params["url_encoded_fmt_stream_map"].split(",")
        for url_desc in data_flashvars:
            url_desc_map = dict(urlparse.parse_qsl(url_desc))
            if not url_desc_map.get("url") and not url_desc_map.get("stream"):
                continue

            try:
                key = int(url_desc_map["itag"])
                if not fmt_value.get(key):
                    continue

                if url_desc_map.get("url"):
                    url = urllib.unquote(url_desc_map["url"])
                elif url_desc_map.get("conn") and url_desc_map.get("stream"):
                    url = urllib.unquote(url_desc_map["conn"])
                    if url.rfind("/") < len(url) - 1:
                        url += "/"
                    url += urllib.unquote(url_desc_map["stream"])
                elif url_desc_map.get(
                        "stream") and not url_desc_map.get("conn"):
                    url = urllib.unquote(url_desc_map["stream"])

                if url_desc_map.get("sig"):
                    url += "&signature=" + url_desc_map["sig"]
                elif url_desc_map.get("s"):
                    sig = url_desc_map["s"]
                    if not js_signature:
                        urljs = scrapertools.find_single_match(
                            youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"')
                        urljs = urljs.replace("\\", "")
                        if urljs:
                            if not re.search(r'https?://', urljs):
                                urljs = urlparse.urljoin(
                                    "https://www.youtube.com", urljs)
                            data_js = httptools.downloadpage(urljs).data
                            from jsinterpreter import JSInterpreter
                            funcname = scrapertools.find_single_match(
                                data_js, '\.sig\|\|([A-z0-9$]+)\(')
                            if not funcname:
                                funcname = scrapertools.find_single_match(
                                    data_js, '["\']signature["\']\s*,\s*'
                                    '([A-z0-9$]+)\(')
                            jsi = JSInterpreter(data_js)
                            js_signature = jsi.extract_function(funcname)

                    signature = js_signature([sig])
                    url += "&signature=" + signature
                url = url.replace(",", "%2C")
                video_urls.append(["(" + fmt_value[key] + ") [youtube]", url])
            except:
                import traceback
                logger.info(traceback.format_exc())

    return video_urls
示例#3
0
def get_video_url(item):
    logger.trace()
    itemlist = []

    video_id = scrapertools.find_single_match(item.url, 'v=([A-z0-9_-]{11})')

    url = 'http://www.youtube.com/get_video_info?video_id=%s&eurl=https://youtube.googleapis.com/v/%s&ssl_stream=1' % (
        video_id, video_id)
    data = httptools.downloadpage(url).data
    params = dict(urlparse.parse_qsl(data))

    if params.get('hlsvp'):
        itemlist.append(Video(type='Live', url=params['hlsvp']))
        return itemlist

    if params.get('dashmpd') and params.get('use_cipher_signature',
                                            '') != 'True':
        itemlist.append(Video(type='MPD', url=params['dashmpd'], mpd=True))

    js_signature = ""
    youtube_page_data = httptools.downloadpage(
        "http://www.youtube.com/watch?v=%s" % video_id).data
    params = extract_flashvars(youtube_page_data)
    if params.get('url_encoded_fmt_stream_map'):
        data_flashvars = params["url_encoded_fmt_stream_map"].split(",")
        for url_desc in data_flashvars:
            url_desc_map = dict(urlparse.parse_qsl(url_desc))
            if not url_desc_map.get("url") and not url_desc_map.get("stream"):
                continue

            try:
                key = int(url_desc_map["itag"])
                if not fmt_value.get(key):
                    continue

                if url_desc_map.get("url"):
                    url = urllib.unquote(url_desc_map["url"])
                elif url_desc_map.get("conn") and url_desc_map.get("stream"):
                    url = urllib.unquote(url_desc_map["conn"])
                    if url.rfind("/") < len(url) - 1:
                        url += "/"
                    url += urllib.unquote(url_desc_map["stream"])
                elif url_desc_map.get(
                        "stream") and not url_desc_map.get("conn"):
                    url = urllib.unquote(url_desc_map["stream"])

                if url_desc_map.get("sig"):
                    url += "&signature=" + url_desc_map["sig"]
                elif url_desc_map.get("s"):
                    sig = url_desc_map["s"]
                    if not js_signature:
                        urljs = scrapertools.find_single_match(
                            youtube_page_data, '"assets":.*?"js":\s*"([^"]+)"')
                        urljs = urljs.replace("\\", "")
                        if urljs:
                            if not re.search(r'https?://', urljs):
                                urljs = urlparse.urljoin(
                                    "https://www.youtube.com", urljs)
                            data_js = httptools.downloadpage(urljs).data
                            from jsinterpreter import JSInterpreter
                            funcname = scrapertools.find_single_match(
                                data_js, '\.sig\|\|([A-z0-9$]+)\(')
                            if not funcname:
                                funcname = scrapertools.find_single_match(
                                    data_js, '["\']signature["\']\s*,\s*'
                                    '([A-z0-9$]+)\(')
                            jsi = JSInterpreter(data_js)
                            js_signature = jsi.extract_function(funcname)

                    signature = js_signature([sig])
                    url += "&signature=" + signature
                url = url.replace(",", "%2C")
                itemlist.append(
                    Video(type=fmt_value[key][1],
                          res=fmt_value[key][0],
                          url=url))
            except:
                logger.error()

    return itemlist