def handleWebLinks(self): self.logDebug("Search for Web links ") package_links = [] pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="(.+?)"[^>]*?name="file"' ids = re.findall(pattern, self.html, re.I | re.S) self.logDebug("Decrypting %d Web links" % len(ids)) for idx, weblink_id in enumerate(ids): try: self.logDebug("Decrypting Web link %d, %s" % (idx + 1, weblink_id)) res = self.load("http://linkcrypt.ws/out.html", post = {'file':weblink_id}) indexs = res.find("window.location =") + 19 indexe = res.find('"', indexs) link2 = res[indexs:indexe] self.logDebug(link2) link2 = html_unescape(link2) package_links.append(link2) except Exception, detail: self.logDebug("Error decrypting Web link %s, %s" % (weblink_id, detail))
def handleFree(self, pyfile): try: link1 = re.search(r'"(https://docs.google.com/uc\?id.*?export=download)",', self.html.decode('unicode-escape')).group(1) except AttributeError: self.error(_("Hop #1 not found")) else: self.logDebug("Next hop: %s" % link1) self.html = self.load(link1).decode('unicode-escape') try: link2 = html_unescape(re.search(r'href="(/uc\?export=download.*?)">', self.html).group(1)) except AttributeError: self.error(_("Hop #2 not found")) else: self.logDebug("Next hop: %s" % link2) link3 = self.load("https://docs.google.com" + link2, just_header=True) self.logDebug("DL-Link: %s" % link3['location']) self.link = link3['location']
def proceed(self, url, location): url = self.parent.url self.html = self.load(url) link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html) for id in link_ids: new_link = html_unescape(re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1)) self.urls.append(new_link)
def handleFree(self, pyfile): try: link1 = re.search( r'"(https://docs.google.com/uc\?id.*?export=download)",', self.html.decode('unicode-escape')).group(1) except AttributeError: self.error(_("Hop #1 not found")) else: self.logDebug("Next hop: %s" % link1) self.html = self.load(link1).decode('unicode-escape') try: link2 = html_unescape( re.search(r'href="(/uc\?export=download.*?)">', self.html).group(1)) except AttributeError: self.error(_("Hop #2 not found")) else: self.logDebug("Next hop: %s" % link2) link3 = self.load("https://docs.google.com" + link2, just_header=True) self.logDebug("DL-Link: %s" % link3['location']) self.link = link3['location']
def handleWebLinks(self): self.logDebug("Search for Web links ") package_links = [] pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="(.+?)"[^>]*?name="file"' ids = re.findall(pattern, self.html, re.I | re.S) self.logDebug("Decrypting %d Web links" % len(ids)) for idx, weblink_id in enumerate(ids): try: self.logDebug("Decrypting Web link %d, %s" % (idx + 1, weblink_id)) res = self.load("http://linkcrypt.ws/out.html", post={'file': weblink_id}) indexs = res.find("window.location =") + 19 indexe = res.find('"', indexs) link2 = res[indexs:indexe] self.logDebug(link2) link2 = html_unescape(link2) package_links.append(link2) except Exception, detail: self.logDebug("Error decrypting Web link %s, %s" % (weblink_id, detail))
def get_file_url(self): """ returns the absolute downloadable filepath """ if not self.html: self.download_html() file_url = html_unescape(re.search(r'hashlink=(http.*?)"', self.html).group(1)) return file_url
def getPackageName(self): if hasattr(self, 'TITLE_PATTERN'): m = re.search(self.TITLE_PATTERN, self.html) if m: name = html_unescape(m.group('title').strip()) self.logDebug("Found name [%s] in package info" % (name)) return name return None
def proceed(self, url, location): url = self.parent.url self.html = self.load(url) link_ids = re.findall( r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html) for id in link_ids: new_link = html_unescape( re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1)) self.urls.append(new_link)
def handleCaptcha(self, inputs): m = re.search(self.CAPTCHA_PATTERN, self.html) if m: captcha_url = m.group(1) inputs['code'] = self.decryptCaptcha(captcha_url) return 1 m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S) if m: captcha_div = m.group(1) numerals = re.findall( r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) self.logDebug(captcha_div) inputs['code'] = "".join( a[1] for a in sorted(numerals, key=lambda num: int(num[0]))) self.logDebug("Captcha code: %s" % inputs['code'], numerals) return 2 recaptcha = ReCaptcha(self) try: captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1) except Exception: captcha_key = recaptcha.detect_key() else: self.logDebug("ReCaptcha key: %s" % captcha_key) if captcha_key: inputs['recaptcha_response_field'], inputs[ 'recaptcha_challenge_field'] = recaptcha.challenge(captcha_key) return 3 solvemedia = SolveMedia(self) try: captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1) except Exception: captcha_key = solvemedia.detect_key() else: self.logDebug("SolveMedia key: %s" % captcha_key) if captcha_key: inputs['adcopy_response'], inputs[ 'adcopy_challenge'] = solvemedia.challenge(captcha_key) return 4 return 0
def downloadFile(self, pyfile): url = pyfile.url for i in range(5): header = self.load(url, just_header=True) # self.load does not raise a BadHeader on 404 responses, do it here if 'code' in header and header['code'] == 404: raise ResponseException(404) if 'location' in header: self.logDebug("Location: " + header['location']) base = search(r'https?://[^/]+', url).group(0) if header['location'].startswith("http"): url = unquote(header['location']) elif header['location'].startswith("/"): url = base + unquote(header['location']) else: url = '%s/%s' % (base, unquote(header['location'])) else: break name = html_unescape(unquote(urlparse(url).path.split("/")[-1])) if 'content-disposition' in header: self.logDebug("Content-Disposition: " + header['content-disposition']) m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition']) if m: disp = m.groupdict() self.logDebug(disp) if not disp['enc']: disp['enc'] = 'utf-8' name = remove_chars(disp['name'], "\"';").strip() name = unicode(unquote(name), disp['enc']) if not name: name = url pyfile.name = name self.logDebug("Filename: %s" % pyfile.name) self.download(url, disposition=True)
def downloadFile(self, pyfile): url = pyfile.url for _ in xrange(5): header = self.load(url, just_header=True) # self.load does not raise a BadHeader on 404 responses, do it here if "code" in header and header["code"] == 404: raise ResponseException(404) if "location" in header: self.logDebug("Location: " + header["location"]) base = match(r"https?://[^/]+", url).group(0) if header["location"].startswith("http"): url = unquote(header["location"]) elif header["location"].startswith("/"): url = base + unquote(header["location"]) else: url = "%s/%s" % (base, unquote(header["location"])) else: break name = html_unescape(unquote(urlparse(url).path.split("/")[-1])) if "content-disposition" in header: self.logDebug("Content-Disposition: " + header["content-disposition"]) m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header["content-disposition"]) if m: disp = m.groupdict() self.logDebug(disp) if not disp["enc"]: disp["enc"] = "utf-8" name = remove_chars(disp["name"], "\"';").strip() name = unicode(unquote(name), disp["enc"]) if not name: name = url pyfile.name = name self.logDebug("Filename: %s" % pyfile.name) self.download(url, disposition=True)
def handleCaptcha(self, inputs): m = re.search(self.CAPTCHA_PATTERN, self.html) if m: captcha_url = m.group(1) inputs['code'] = self.decryptCaptcha(captcha_url) return 1 m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S) if m: captcha_div = m.group(1) numerals = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) self.logDebug(captcha_div) inputs['code'] = "".join(a[1] for a in sorted(numerals, key=lambda num: int(num[0]))) self.logDebug("Captcha code: %s" % inputs['code'], numerals) return 2 recaptcha = ReCaptcha(self) try: captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1) except Exception: captcha_key = recaptcha.detect_key() else: self.logDebug("ReCaptcha key: %s" % captcha_key) if captcha_key: inputs['recaptcha_response_field'], inputs['recaptcha_challenge_field'] = recaptcha.challenge(captcha_key) return 3 solvemedia = SolveMedia(self) try: captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1) except Exception: captcha_key = solvemedia.detect_key() else: self.logDebug("SolveMedia key: %s" % captcha_key) if captcha_key: inputs['adcopy_response'], inputs['adcopy_challenge'] = solvemedia.challenge(captcha_key) return 4 return 0
def get_file_name(self): file_name_pattern = r"<h1 class='globalHd'>(.*)</h1>" return html_unescape(re.search(file_name_pattern, self.html).group(1).replace("/", "") + '.flv')
def process(self, pyfile): pyfile.url = replace_patterns(pyfile.url, self.URL_REPLACEMENTS) html = self.load(pyfile.url, decode=True) if re.search(r'<div id="player-unavailable" class="\s*player-width player-height\s*">', html): self.offline() if "We have been receiving a large volume of requests from your network." in html: self.tempOffline() # get config use3d = self.getConfig('3d') if use3d: quality = {"sd": 82, "hd": 84, "fullhd": 85, "240p": 83, "360p": 82, "480p": 82, "720p": 84, "1080p": 85, "3072p": 85} else: quality = {"sd": 18, "hd": 22, "fullhd": 37, "240p": 5, "360p": 18, "480p": 35, "720p": 22, "1080p": 37, "3072p": 38} desired_fmt = self.getConfig('fmt') if not desired_fmt: desired_fmt = quality.get(self.getConfig('quality'), 18) elif desired_fmt not in self.formats: self.logWarning(_("FMT %d unknown, using default") % desired_fmt) desired_fmt = 0 # parse available streams streams = re.search(r'"url_encoded_fmt_stream_map":"(.+?)",', html).group(1) streams = [x.split('\u0026') for x in streams.split(',')] streams = [dict((y.split('=', 1)) for y in x) for x in streams] streams = [(int(x['itag']), urllib.unquote(x['url'])) for x in streams] # self.logDebug("Found links: %s" % streams) self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams]) # build dictionary of supported itags (3D/2D) allowed = lambda x: self.getConfig(self.formats[x][0]) streams = [x for x in streams if x[0] in self.formats and allowed(x[0])] if not streams: self.fail(_("No available stream meets your preferences")) fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d] or streams) self.logDebug("DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" % (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt], "" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT ")) # return fmt nearest to quality index if desired_fmt in fmt_dict and allowed(desired_fmt): fmt = desired_fmt else: sel = lambda x: self.formats[x][3] #: select quality index comp = lambda x, y: abs(sel(x) - sel(y)) self.logDebug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()]) fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and sel(x) > sel(y) else y, fmt_dict.keys()) self.logDebug("Chosen fmt: %s" % fmt) url = fmt_dict[fmt] self.logDebug("URL: %s" % url) # set file name file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv" file_name_pattern = '<meta name="title" content="(.+?)">' name = re.search(file_name_pattern, html).group(1).replace("/", "") # Cleaning invalid characters from the file name name = name.encode('ascii', 'replace') for c in self.invalidChars: name = name.replace(c, '_') pyfile.name = html_unescape(name) time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url) ffmpeg = which("ffmpeg") if ffmpeg and time: m, s = time.groups()[1:] if m is None: m = "0" pyfile.name += " (starting at %s:%s)" % (m, s) pyfile.name += file_suffix filename = self.download(url) if ffmpeg and time: inputfile = filename + "_" os.rename(filename, inputfile) subprocess.call([ ffmpeg, "-ss", "00:%s:%s" % (m, s), "-i", inputfile, "-vcodec", "copy", "-acodec", "copy", filename]) os.remove(inputfile)
def process(self, pyfile): pyfile.url = replace_patterns(pyfile.url, self.URL_REPLACEMENTS) html = self.load(pyfile.url, decode=True) if re.search( r'<div id="player-unavailable" class="\s*player-width player-height\s*">', html): self.offline() if "We have been receiving a large volume of requests from your network." in html: self.tempOffline() # get config use3d = self.getConfig('3d') if use3d: quality = { "sd": 82, "hd": 84, "fullhd": 85, "240p": 83, "360p": 82, "480p": 82, "720p": 84, "1080p": 85, "3072p": 85 } else: quality = { "sd": 18, "hd": 22, "fullhd": 37, "240p": 5, "360p": 18, "480p": 35, "720p": 22, "1080p": 37, "3072p": 38 } desired_fmt = self.getConfig('fmt') if not desired_fmt: desired_fmt = quality.get(self.getConfig('quality'), 18) elif desired_fmt not in self.formats: self.logWarning(_("FMT %d unknown, using default") % desired_fmt) desired_fmt = 0 # parse available streams streams = re.search(r'"url_encoded_fmt_stream_map":"(.+?)",', html).group(1) streams = [x.split('\u0026') for x in streams.split(',')] streams = [dict((y.split('=', 1)) for y in x) for x in streams] streams = [(int(x['itag']), unquote(x['url'])) for x in streams] # self.logDebug("Found links: %s" % streams) self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams]) # build dictionary of supported itags (3D/2D) allowed = lambda x: self.getConfig(self.formats[x][0]) streams = [ x for x in streams if x[0] in self.formats and allowed(x[0]) ] if not streams: self.fail(_("No available stream meets your preferences")) fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d] or streams) self.logDebug( "DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" % (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt], "" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT ")) # return fmt nearest to quality index if desired_fmt in fmt_dict and allowed(desired_fmt): fmt = desired_fmt else: sel = lambda x: self.formats[x][3] #: select quality index comp = lambda x, y: abs(sel(x) - sel(y)) self.logDebug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()]) fmt = reduce( lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and sel(x) > sel(y) else y, fmt_dict.keys()) self.logDebug("Chosen fmt: %s" % fmt) url = fmt_dict[fmt] self.logDebug("URL: %s" % url) # set file name file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv" file_name_pattern = '<meta name="title" content="(.+?)">' name = re.search(file_name_pattern, html).group(1).replace("/", "") # Cleaning invalid characters from the file name name = name.encode('ascii', 'replace') for c in self.invalidChars: name = name.replace(c, '_') pyfile.name = html_unescape(name) time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url) ffmpeg = which("ffmpeg") if ffmpeg and time: m, s = time.groups()[1:] if m is None: m = "0" pyfile.name += " (starting at %s:%s)" % (m, s) pyfile.name += file_suffix filename = self.download(url) if ffmpeg and time: inputfile = filename + "_" os.rename(filename, inputfile) subprocess.call([ ffmpeg, "-ss", "00:%s:%s" % (m, s), "-i", inputfile, "-vcodec", "copy", "-acodec", "copy", filename ]) os.remove(inputfile)