def _parse_sig_js(jscode):
    funcname = _search_regex(
        r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
        'Initial JS player signature function name')
    jsi = JSInterpreter(jscode)
    initial_function = jsi.extract_function(funcname)
    return lambda s: initial_function([s])
	def _parse_sig_js(self, jscode):
		funcname = self._search_regex(
				(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
				jscode, group='sig')
		jsi = JSInterpreter(jscode)
		initial_function = jsi.extract_function(funcname)
		return lambda s: initial_function([s])
示例#3
0
	def _parse_sig_js(self, jscode):
		funcname = self._search_regex(
				(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
				jscode, group='sig')
		jsi = JSInterpreter(jscode)
		initial_function = jsi.extract_function(funcname)
		return lambda s: initial_function([s])
示例#4
0
def _get_mainfunc_from_js(js):
    """ Return main signature decryption function from javascript as dict. """
    dbg("Scanning js for main function.")
    m = re.search(r'\.sig\|\|([a-zA-Z0-9$]+)\(', js)
    funcname = m.group(1)
    dbg("Found main function: %s", funcname)
    jsi = JSInterpreter(js)
    return jsi.extract_function(funcname)
	def _parse_sig_js(self, jscode):
		funcname = self._search_regex(
				(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
				r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
				jscode, group='sig')
		jsi = JSInterpreter(jscode)
		initial_function = jsi.extract_function(funcname)
		return lambda s: initial_function([s])
 def _parse_sig_js(self, jscode):
     funcname = self._search_regex((
         r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
         r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
         r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
         r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
         r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('
     ),
                                   jscode,
                                   group='sig')
     jsi = JSInterpreter(jscode)
     initial_function = jsi.extract_function(funcname)
     return lambda s: initial_function([s])
示例#7
0
def retrieveSignatureDecryptorFunction(ytplayerConfigJson):
    global DECRYPTOR_FUNCTION_CACHE
    if DECRYPTOR_FUNCTION_CACHE is not None:
        return DECRYPTOR_FUNCTION_CACHE

    assetsJson = ytplayerConfigJson['assets']
    try:
        print(
            "Video seems to use signature protection...\nAttempting extraction of decryptor from video player asset...\n\n"
        )
        jsplayer = assetsJson[
            'js']  # This key may not exist if video uses swf player
        print(
            "JSPlayer identified as the video player asset. Downloading JSPlayer javascript file...\n\n"
        )
        jsplayerUrl = "https://www.youtube.com" + jsplayer
        jsplayerContent = requests.get(jsplayerUrl).text
        print("Asset downloaded. Extracting decryption function...\n\n")
        # This list is directly taken from youtube-dl's source - youtube.py
        # It contains regex filters to find the name of the decryption function in the jsplayer file.
        # The idea is that at least one filter from the list will match
        regexSearchList = [
            r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
            r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
            r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
            r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
            r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('
        ]
        decryptionFunctionName = None
        for regexSearchStr in regexSearchList:
            regexSearchResult = re.search(regexSearchStr, jsplayerContent)
            if regexSearchResult is not None:
                decryptionFunctionName = regexSearchResult.groupdict()['sig']
                break
        if decryptionFunctionName is None:
            print(
                "Cannot find signature-decryption function in JSPlayer asset!")
            exit(1)
        jsInt = JSInterpreter(jsplayerContent)
        decryptionFunction = jsInt.extract_function(
            decryptionFunctionName)  # Returns an invokable decryption function
        print(
            "Signature-decryption function found. Deciphering encrypted signature...\n\n"
        )
        DECRYPTOR_FUNCTION_CACHE = decryptionFunction
        return decryptionFunction

    except KeyError:
        print("ytplayer.config.assets.js doesn't exist.\nProbably swf player?")
        exit(1)
示例#8
0
	def _parse_sig_js(self, jscode):
		funcname = self._search_regex(
				(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
				r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
				# Obsolete patterns
				r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
				r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
				r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
				jscode, group='sig')
		jsi = JSInterpreter(jscode)
		initial_function = jsi.extract_function(funcname)
		return lambda s: initial_function([s])
 def _parse_sig_js(self, jscode):
     funcname = self._search_regex(r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode)
     jsi = JSInterpreter(jscode)
     initial_function = jsi.extract_function(funcname)
     return lambda s: initial_function([s])
示例#10
0
def getCipher(session, player):
    playerJS = session.get(youtubeUrl +
                           player.get("assets", {}).get("js")).text
    jsi = JSInterpreter(playerJS)
    return jsi.extract_function('FK')
	def _parse_sig_js(self, jscode):
		funcname = self._search_regex(r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode)
		jsi = JSInterpreter(jscode)
		initial_function = jsi.extract_function(funcname)
		return lambda s: initial_function([s])
示例#12
0
def scrapeWebPageForVideoLinks(data):
    logger.info(data)
    links = {}

    fmt_value = {
        5: "240p h263 flv",
        18: "360p h264 mp4",
        22: "720p h264 mp4",
        26: "???",
        33: "???",
        34: "360p h264 flv",
        35: "480p h264 flv",
        37: "1080p h264 mp4",
        36: "3gpp",
        38: "720p vp8 webm",
        43: "360p h264 flv",
        44: "480p vp8 webm",
        45: "720p vp8 webm",
        46: "520p vp8 webm",
        59: "480 for rtmpe",
        78: "400 for rtmpe",
        82: "360p h264 stereo",
        83: "240p h264 stereo",
        84: "720p h264 stereo",
        85: "520p h264 stereo",
        100: "360p vp8 webm stereo",
        101: "480p vp8 webm stereo",
        102: "720p vp8 webm stereo",
        120: "hd720",
        121: "hd1080"
    }

    video_urls = []

    flashvars = extractFlashVars(data)
    if not flashvars.has_key(u"url_encoded_fmt_stream_map"):
        return links

    if flashvars.has_key(u"ttsurl"):
        logger.info("ttsurl=" + flashvars[u"ttsurl"])

    js_signature = ""
    for url_desc in flashvars[u"url_encoded_fmt_stream_map"].split(u","):
        url_desc_map = cgi.parse_qs(url_desc)
        logger.info(u"url_map: " + repr(url_desc_map))
        if not (url_desc_map.has_key(u"url")
                or url_desc_map.has_key(u"stream")):
            continue

        try:
            key = int(url_desc_map[u"itag"][0])
            url = u""
            if url_desc_map.has_key(u"url"):
                url = urllib.unquote(url_desc_map[u"url"][0])
            elif url_desc_map.has_key(u"conn") and url_desc_map.has_key(
                    u"stream"):
                url = urllib.unquote(url_desc_map[u"conn"][0])
                if url.rfind("/") < len(url) - 1:
                    url = url + "/"
                url = url + urllib.unquote(url_desc_map[u"stream"][0])
            elif url_desc_map.has_key(
                    u"stream") and not url_desc_map.has_key(u"conn"):
                url = urllib.unquote(url_desc_map[u"stream"][0])

            if url_desc_map.has_key(u"sig"):
                url = url + u"&signature=" + url_desc_map[u"sig"][0]
            elif url_desc_map.has_key(u"s"):
                sig = url_desc_map[u"s"][0]
                if not js_signature:
                    urljs = scrapertools.find_single_match(
                        data, '"assets":.*?"js":\s*"([^"]+)"')
                    urljs = urljs.replace("\\", "")
                    if urljs:
                        data_js = scrapertools.downloadpage("http:" + urljs)
                        from jsinterp import JSInterpreter
                        funcname = scrapertools.find_single_match(
                            data_js, '\.sig\|\|([A-z0-9$]+)\(')

                        jsi = JSInterpreter(data_js)
                        js_signature = jsi.extract_function(funcname)

                signature = js_signature([sig])
                url += u"&signature=" + signature

            # Se encodean las comas para que no falle en método built-in
            url = url.replace(",", "%2C")
            video_urls.append(["(" + fmt_value[key] + ") [youtube]", url])
        except:
            import traceback
            logger.info(traceback.format_exc())

    return video_urls