def handleShowDJ(self, url): src = self.getSJSrc(url) soup = BeautifulSoup(src) post = soup.find("div", attrs={"id": "page_post"}) ps = post.findAll("p") found = unescape(soup.find("h2").find("a").string.split(' –')[0]) if found: seasonName = found groups = {} gid = -1 for p in ps: if re.search("<strong>Sprache|<strong>Format", str(p)): var = p.findAll("strong") opts = {"Sprache": "", "Format": ""} for v in var: n = unescape(v.string).strip() n = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', n) if n.strip() not in opts: continue val = v.nextSibling if not val: continue val = val.replace("|", "").strip() val = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', val) opts[n.strip()] = val.strip() gid += 1 groups[gid] = {} groups[gid]["ep"] = {} groups[gid]["opts"] = opts elif re.search("<strong>Download:", str(p)): parts = str(p).split("<br />") if re.search("<strong>", parts[0]): ename = re.search('<strong>(.*?)</strong>',parts[0]).group(1).strip().decode("utf-8").replace("–", "-") groups[gid]["ep"][ename] = {} parts.remove(parts[0]) for part in parts: hostername = re.search(" \| ([-a-zA-Z0-9]+\.\w+)",part) if hostername: hostername = hostername.group(1) groups[gid]["ep"][ename][hostername] = [] links = re.findall('href="(.*?)"',part) for link in links: groups[gid]["ep"][ename][hostername].append(link + "#hasName") links = [] for g in groups.values(): for ename in g["ep"]: links.extend(self.getpreferred(g["ep"][ename])) if self.getConfig("changeNameDJ") == "Episode": self.packages.append((ename, links, ename)) links = [] package = "%s (%s, %s)" % (seasonName, g["opts"]["Format"], g["opts"]["Sprache"]) if self.getConfig("changeNameDJ") == "Format": self.packages.append((package, links, package)) links = [] if (self.getConfig("changeNameDJ") == "Packagename") or re.search("#hasName", url): self.core.files.addLinks(links, self.pyfile.package().id) elif (self.getConfig("changeNameDJ") == "Show") or not re.search("#hasName", url): self.packages.append((seasonName, links, seasonName))
def handleWebLinks(self): package_links = [] self.logDebug("Search for Web links") if not self.js: self.logDebug("no JS -> skip Web links") else: #@TODO: Gather paginated web links pattern = r'<a href="http://linksave\.in/(\w{43})"' ids = re.findall(pattern, self.html) self.logDebug("Decrypting %d Web links" % len(ids)) for i, weblink_id in enumerate(ids): try: webLink = "http://linksave.in/%s" % weblink_id self.logDebug("Decrypting Web link %d, %s" % (i+1, webLink)) fwLink = "http://linksave.in/fw-%s" % weblink_id response = self.load(fwLink) jscode = re.findall(r'<script type="text/javascript">(.*)</script>', response)[-1] jseval = self.js.eval("document = { write: function(e) { return e; } }; %s" % jscode) dlLink = re.search(r'http://linksave\.in/dl-\w+', jseval).group(0) self.logDebug("JsEngine returns value [%s] for redirection link" % dlLink) response = self.load(dlLink) link = unescape(re.search(r'<iframe src="(.+?)"', response).group(1)) package_links.append(link) except Exception, detail: self.logDebug("Error decrypting Web link %s, %s" % (webLink, detail))
def get_file_name(self): if self.html is None: self.download_html() name = re.search("flashvars.title = \"(.*?)\";", self.html).group(1) name = "%s.flv" % unescape(name.encode("ascii", "ignore")).decode("utf-8").encode("ascii", "ignore").replace("+", " ") return name
def decrypt(self, pyfile): url = pyfile.url src = self.req.load(str(url)) soup = BeautifulSoup(src) captchaTag = soup.find("img", attrs={"id": "captcha_image"}) if captchaTag: captchaUrl = "http://rs-layer.com/" + captchaTag["src"] self.logDebug("Captcha URL: %s" % captchaUrl) result = self.decryptCaptcha(str(captchaUrl), imgtype="png") captchaInput = soup.find("input", attrs={"id": "captcha"}) self.req.lastUrl = url src = self.req.load(str(url), post={'captcha_input': result, 'image_name': captchaTag["src"]}) link_ids = re.findall(r"onclick=\"getFile\(\'([0-9]{7}-.{8})\'\);changeBackgroundColor", src) if not len(link_ids) > 0: self.retry() self.correctCaptcha() links = [] for id in link_ids: self.logDebug("ID: %s" % id) new_link = unescape(re.search(r"<iframe style=\"width: 100%; height: 100%;\" src=\"(.*)\"></frame>", self.req.load("http://rs-layer.com/link-" + id + ".html")).group(1)) self.logDebug("Link: %s" % new_link) links.append(new_link) self.packages.append((self.pyfile.package().name, links, self.pyfile.package().folder))
def handleWebLinks(self): package_links = [] self.logDebug("Search for Web links") if not self.js: self.logDebug("no JS -> skip Web links") else: #@TODO: Gather paginated web links pattern = r'<a href="http://linksave\.in/(\w{43})"' ids = re.findall(pattern, self.html) self.logDebug("Decrypting %d Web links" % len(ids)) for i, id in enumerate(ids): try: webLink = "http://linksave.in/%s" % id self.logDebug("Decrypting Web link %d, %s" % (i+1, webLink)) fwLink = "http://linksave.in/fw-%s" % id response = self.load(fwLink) jscode = re.findall(r'<script type="text/javascript">(.*)</script>', response)[-1] jseval = self.js.eval("document = { write: function(e) { return e; } }; %s" % jscode) dlLink = re.search(r'http://linksave\.in/dl-\w+', jseval).group(0) self.logDebug("JsEngine returns value [%s] for redirection link" % dlLink) response = self.load(dlLink) link = unescape(re.search(r'<iframe src="(.+?)"', response).group(1)) package_links.append(link) except Exception, detail: self.logDebug("Error decrypting Web link %s, %s" % (webLink, detail))
def proceed(self, url, location): url = self.parent.url self.html = self.load(url) link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html) for id in link_ids: new_link = unescape( re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1)) self.urls.append(new_link)
def get_file_url(self): """ returns the absolute downloadable filepath """ if self.html is None: self.download_html() file_url = unescape(re.search(r'hashlink=(http.*?)"', self.html).group(1)) return file_url
def get_file_name(self): if self.html is None: self.download_html() name = re.search("flashvars.title = \"(.*?)\";", self.html).group(1) name = "%s.flv" % unescape( name.encode("ascii", "ignore")).decode("utf-8").encode( "ascii", "ignore").replace("+", " ") return name
def get_file_url(self): """ returns the absolute downloadable filepath """ if self.html is None: self.download_html() file_url = unescape( re.search(r'hashlink=(http.*?)"', self.html).group(1)) return file_url
def handleContainer(self, type_): package_links = [] type_ = type_.lower() self.logDebug('Seach for %s Container links' % type_.upper()) if not type_.isalnum(): # check to prevent broken re-pattern (cnl2,rsdf,ccf,dlc,web are all alpha-numeric) self.fail('unknown container type "%s" (this is probably a bug)' % type_) pattern = r"\('%s_link'\).href=unescape\('(.*?\.%s)'\)" % (type_, type_) containersLinks = re.findall(pattern, self.html) self.logDebug("Found %d %s Container links" % (len(containersLinks), type_.upper())) for containerLink in containersLinks: link = "http://linksave.in/%s" % unescape(containerLink) package_links.append(link) return package_links
def get_file_url(self): """ returns the absolute downloadable filepath """ if self.html is None: self.download_html() # get id id = re.search("previewplayer/\\?v=(.*?)&width", self.html).group(1) # check for hd link and return if there if "flashvars.hd = \"1\";" in self.html: content = self.req.load("http://www.megavideo.com/xml/videolink.php?v=%s" % id) return unescape(re.search("hd_url=\"(.*?)\"", content).group(1)) # else get normal link s = re.search("flashvars.s = \"(\\d+)\";", self.html).group(1) un = re.search("flashvars.un = \"(.*?)\";", self.html).group(1) k1 = re.search("flashvars.k1 = \"(\\d+)\";", self.html).group(1) k2 = re.search("flashvars.k2 = \"(\\d+)\";", self.html).group(1) return "http://www%s.megavideo.com/files/%s/" % (s, self.__decrypt(un, int(k1), int(k2)))
def handleShow(self, url): src = self.getSJSrc(url) soup = BeautifulSoup(src) packageName = self.pyfile.package().name if self.getConfig("changeNameSJ") == "Show": found = unescape(soup.find("h2").find("a").string.split(' –')[0]) if found: packageName = found nav = soup.find("div", attrs={"id": "scb"}) package_links = [] for a in nav.findAll("a"): if self.getConfig("changeNameSJ") == "Show": package_links.append(a["href"]) else: package_links.append(a["href"] + "#hasName") if self.getConfig("changeNameSJ") == "Show": self.packages.append((packageName, package_links, packageName)) else: self.core.files.addLinks(package_links, self.pyfile.package().id)
def decrypt(self, pyfile): url = pyfile.url src = self.req.load(str(url)) soup = BeautifulSoup(src) captchaTag = soup.find("img", attrs={"id": "captcha_image"}) if captchaTag: captchaUrl = "http://rs-layer.com/" + captchaTag["src"] self.logDebug("Captcha URL: %s" % captchaUrl) result = self.decryptCaptcha(str(captchaUrl), imgtype="png") captchaInput = soup.find("input", attrs={"id": "captcha"}) self.req.lastUrl = url src = self.req.load(str(url), post={ 'captcha_input': result, 'image_name': captchaTag["src"] }) link_ids = re.findall( r"onclick=\"getFile\(\'([0-9]{7}-.{8})\'\);changeBackgroundColor", src) if not len(link_ids) > 0: self.retry() self.correctCaptcha() links = [] for id in link_ids: self.logDebug("ID: %s" % id) new_link = unescape( re.search( r"<iframe style=\"width: 100%; height: 100%;\" src=\"(.*)\"></frame>", self.req.load("http://rs-layer.com/link-" + id + ".html")).group(1)) self.logDebug("Link: %s" % new_link) links.append(new_link) self.packages.append( (self.pyfile.package().name, links, self.pyfile.package().folder))
def get_file_url(self): """ returns the absolute downloadable filepath """ if self.html is None: self.download_html() # get id id = re.search("previewplayer/\\?v=(.*?)&width", self.html).group(1) # check for hd link and return if there if "flashvars.hd = \"1\";" in self.html: content = self.req.load( "http://www.megavideo.com/xml/videolink.php?v=%s" % id) return unescape(re.search("hd_url=\"(.*?)\"", content).group(1)) # else get normal link s = re.search("flashvars.s = \"(\\d+)\";", self.html).group(1) un = re.search("flashvars.un = \"(.*?)\";", self.html).group(1) k1 = re.search("flashvars.k1 = \"(\\d+)\";", self.html).group(1) k2 = re.search("flashvars.k2 = \"(\\d+)\";", self.html).group(1) return "http://www%s.megavideo.com/files/%s/" % ( s, self.__decrypt(un, int(k1), int(k2)))
def get_file_name(self): file_name_pattern = r"<h1 class='globalHd'>(.*)</h1>" return unescape(re.search(file_name_pattern, self.html).group(1).replace("/", "") + '.flv')
def get_file_name(self): file_name_pattern = r"<h1 class='globalHd'>(.*)</h1>" return unescape( re.search(file_name_pattern, self.html).group(1).replace("/", "") + '.flv')