def download(self, **kwargs): if not self.flvUrl: raise Exception("No flv url - can't start download") vId = self.getId() url = UrlMgr(url=self.flvUrl, nocache=True) if not url.data: log.error('could not download page for %s', self.flvUrl) return False tmpId = textextract(url.data, '<iframe class="player" src="http://hellsmedia.com/player/'+vId+"/", '/0"') url2 = UrlMgr("http://hellsmedia.com/js/v.js/"+vId+"/"+tmpId+"/0", nocache=True) downloadId = textextract(url2.data, "'playlistfile': 'http://hellsmedia.com/xml/v.xml/"+vId+"/", "/0") # print "http://hellsmedia.com/js/v.js/"+vId+"/"+tmpId+"/0" # print downloadId flvUrl = None if downloadId: flvUrl = "http://hellsmedia.com/flv/v.flv/"+vId+"/"+downloadId+"/0?start=0" # print flvUrl if not flvUrl: log.error('no flvUrl found for %s', self.flvUrl) return False kwargs['url'] = flvUrl log.info('Extracted following url for download: %s', flvUrl) return LargeDownload(**kwargs)
def download(self, **kwargs): if not self.flvUrl: raise Exception("No flv url - can't start download") vId = self.getId() url = UrlMgr(url=self.flvUrl, nocache=True) if not url.data: log.error('could not download page for %s', self.flvUrl) return False log.info("Streamcloud wants us to wait 10 seconds - we wait 11 :)") if not self.sleep(11): return False post = { 'id': vId, 'imhuman': 'Watch video now', 'op': 'download1', 'usr_login': '', 'fname': textextract(self.flvUrl, 'streamcloud.eu/'+vId+'/', ''), 'referer': '', 'hash': '' } url = UrlMgr(url=self.flvUrl, nocache=True, keepalive=False, post=post) flvUrl = textextract(url.data, 'file: "', '"') if not flvUrl: log.error('no flvUrl found for %s', self.flvUrl) return False kwargs['url'] = flvUrl log.info('Extracted following url for download: %s', flvUrl) return LargeDownload(**kwargs)
def getDetailContent(data, name): content = textextract(url.data, '<td class="atitle2" valign="top">'+name+'</td>', '</tr>') if not content: return None content = textextract(content, '<td class="acontent2">', '</td>') if content.find('Noch nichts eingetragen') > 0: return None return content
def get(self): link = self.link url = link.replace('details', 'stream') url = url url = self.checkPage(UrlMgr(url=url)) url = self.checkPage(url) name = textextract(url.data, '<title>Anime Stream ', ' - German Sub / German Dub Animestreams</title>') media = self.getMedia(name, link) if not media: return None root = html.fromstring(url.data) # each link to a video contains episode.. num = 0 for streamA in root.xpath(".//a[contains(@href,'/episode/')]"): num += 1 streamLink = 'http://www.eliteanimes.com/'+streamA.get('href') title = streamA.text.strip() part = media.createSub() part.num = "%03d"%num part.name = title alternative = part.createSub() alternative.subtitle = 'German' alternative.language = 'German' alternativePart = alternative.createSub() alternativePart.url = streamLink url = link.replace('stream', 'details') url = UrlMgr(url=url) url = self.checkPage(url) # extract image and tags imgUrl = textextract(url.data, 'src="Bilder', '"') if imgUrl: media.img = 'http://www.eliteanimes.com/Bilder'+imgUrl def getDetailContent(data, name): content = textextract(url.data, '<td class="atitle2" valign="top">'+name+'</td>', '</tr>') if not content: return None content = textextract(content, '<td class="acontent2">', '</td>') if content.find('Noch nichts eingetragen') > 0: return None return content year = getDetailContent(url.data, 'Jahr') if year: tmp = re.search(".*([0-9][0-9][0-9][0-9]).*", year) if tmp: media.year = int(tmp.group(1)) for name in ("Zielgruppe", "Setting", "Genre"): content = getDetailContent(url.data, name) if content: tags = textextractall(content, '"><strong> ', ' </strong>') media.addTags(tags) return self.afterExtract(media)
def download(self, **kwargs): if "retry" not in kwargs: kwargs["retry"] = 1 if kwargs["retry"] == 4: log.error("maximum number of retries reached") return None url = UrlMgr(url=self.flvUrl, nocache=True) # before looking at the captcha we have to look at their advertisement # the error msg for a wrong captcha and not looking at their advertisement is # the same - so if you seem to be unlucky maybe they changed something with that match = re.search(r'<iframe.*src="([^"]+)"', url.data) if not match: log.error("could not find the iframe with advertisement") return None log.debug("loading advertisement %s", repr(match.group(1))) adUrl = UrlMgr(match.group(1), nocache=True) adUrl.data # in theory when i give the above url a header={"referer":self.flvUrl} following would be executed too # but to get the actual link it is enough to just load the start page # redirect = textextract(adUrl.data, '<meta http-equiv="refresh" content="0; url=', '">') # adUrl2 = UrlMgr(redirect, header={"referer":self.flvUrl}, nocache=True) # redirect2 = textextract(adUrl2.data, '<form target="_parent" method="post" action="', '"') # adUrl3 = UrlMgr(redirect2, post={"":""}, nocache=True) recaptchaId = textextract(url.data, 'src="http://www.google.com/recaptcha/api/challenge?k=', '"') challenge, solution = solveRecaptcha(recaptchaId, referer=self.flvUrl) if challenge.find("&") > 0: challenge = textextract(challenge, "", "&") post = {"action": "web", "recaptcha_challenge_field": challenge, "recaptcha_response_field": solution} # the x-Requested-With is quite important else it doesn't work url = UrlMgr(url=self.flvUrl, post=post, header={"X-Requested-With": "XMLHttpRequest"}, nocache=True) try: data = json.loads(url.data[3:]) except: log.error("No json returned, showing first 200 chars:") log.error(url.data.replace("\n", "").replace("\r", "")[:200]) data = {"ok": False} if not data["ok"]: kwargs["retry"] += 1 return self.download(**kwargs) else: link = data["response"].decode("base64") log.info("found new link %s", repr(link)) stream = getStreamByLink(link) return stream.download(**kwargs) return None
def download(self, **kwargs): if not self.flvUrl: raise Exception("No flv url - can't start download") url = UrlMgr(url=self.flvUrl, nocache=True) if url.data.find("This file doesn't exist, or has been removed.") > 0: log.info("FireDrive - file was removed") return None confirm = textextract(url.data, 'name="confirm" value="', '"') if confirm is None: log.warning("FireDrive - could not find confirm link") return None url = UrlMgr(url=self.flvUrl, post={'confirm': confirm}, nocache=True) link = textextract(url.data, "file: 'http://dl.", "',") if link is None: log.error("Firedrive could not find link") return None kwargs['url'] = 'http://dl.'+link return LargeDownload(**kwargs)
def checkPage(self, url): if url.data.find('<title>How to Enable Cookies</title>') > 0: # reconnect and set cookie through it url.clear_connection() url.setCacheWriteOnly() else: imgUrl = textextract(url.data, 'src="/captcha/?rnd=', '"') if imgUrl: url.clear_connection() url.setCacheWriteOnly() imgUrl = textextract(url.data, 'src="/captcha/?rnd=', '"') if imgUrl: log.error("as i said.. a captcha") log.error("please visit http://www.eliteanimes.com/ and enter the captcha and you won't be bothered again") # TODO crack this captcha and return a new url object imgUrl = 'http://www.eliteanimes.com/captcha/?rnd='+imgUrl url = UrlMgr(url=imgUrl, cache_writeonly=True) import sys sys.exit() return url
def get(self): link = self.link # this page is special: in it's headers it says it is iso-8859-1 but it actually returns utf-8 url = UrlMgr(url=link, encoding='utf-8') name = textextract(url.data, "<title>",' » Download & Stream » DDLme</title>') media = self.getMedia(name, link) if not media: return None streams = textextract(url.data, '<script type="text/javascript">var subcats = ', '};')+"}" streams = json.loads(streams) for sid in streams: streamData = streams[sid] part = media.createSub() if 'info' in streamData: part.season = int(streamData['info']['staffel']) part.num = int(streamData['info']['nr']) part.name = textextract(streamData['info']['name'], "", u" »") for streamName in streamData['links']: streamParts = streams[sid]['links'][streamName] alternative = part.createSub() existingPartIds = [] for p in streamParts: # 0=partId, 1=js action, 2=icon, 3=url, 4=hoster id, 5=type # TODO write a system to correct this - but I guess since the dataformat # of them is so bad, it is better to wait until they change it if p[0] in existingPartIds: continue existingPartIds.append(p[0]) alternativePart = alternative.createSub() alternativePart.url = p[3] # for debugging # self.afterExtract(media) # import pprint # pprint.pprint(streams) # print(media.__str__().encode('utf-8')) # import sys # sys.exit() return self.afterExtract(media)
def download(self, **kwargs): if not self.flvUrl: raise Exception("No flv url - can't start download") if 'invalidate_cache' in kwargs: url = UrlMgr(url=self.flvUrl, nocache=True) else: url = UrlMgr(url=self.flvUrl) if url.data.find("This file no longer exists on our servers.") > 0: log.info("File was removed") return None key = textextract(url.data, *self.filekeyExtract1) if key is None or len(key) < 1 or len(key) > 40: key = textextract(url.data, *self.filekeyExtract2) params = { 'user': '******', 'numOfErrors': 0, 'key': key, 'pass': '******', 'cid': 'undefined', 'file': textextract(url.data, 'flashvars.file="', '";'), 'cid2': 'undefined', 'cid3': 'undefined' } apiUrl = self.url+"/api/player.api.php" url = UrlMgr(url=apiUrl, params=params, nocache=True) if url.data[:4] == 'url=': kwargs['url'] = textextract(url.data, 'url=', '&title') else: log.error("could not find downloadfile %s", url.data) if 'invalidate_cache' not in kwargs: log.info("retry without cache") kwargs['invalidate_cache'] = True return self.download(**kwargs) else: log.error("could still not find downloadfile %s", url.data) return None return LargeDownload(**kwargs)
def get(self): link = self.link url = UrlMgr(url=link, cookies=self.cookies, encoding='utf-8') name = textextract(textextract(url.data, '<h2>','</h2>'), ' :: ', '</span>') media = self.getMedia(name, link) if not media: return None season = 0 # there is no season information on that page :/ # look if it is a tvshow by that string and just assume a season if "Anime-Serie ::" in url.data: season = 1 root = html.fromstring(url.data) try: listTable = root.get_element_by_id('partlist') except KeyError: # TODO take a more specific exception log.error("no partlist table inside data") log.error(link) log.error(url.data) return None for row in listTable.iterfind(".//tr[@class='link']"): part = media.createSub() part.season = season curCol = 0 for column in row.iterfind("td"): curCol += 1 if curCol == 1: part.num = column.text elif curCol == 2: part.name = column.text elif curCol == 5: # download links pass elif curCol == 6: # stream links dlTable = column.find(".//table[@class='dltable']") if dlTable is None: dlTable = column.find(".//table[@class='list']") if dlTable is None: log.error("no downloadtable in %s", link) continue # they use streamCurCol == 4 with the content "Part 1", "Part 2" etc to name this # but sometimes streamCurCol == 4 would be the size.. so it is quite complicated hasMultipleParts = False for streamRow in dlTable.iterfind(".//tr[@class='medialink']"): if hasMultipleParts: rowString = etree.tostring(streamRow) # create an alternative if that row has no "Part XYZ" inside it # or if that row is Part1/Part 1 if rowString.find("Part") == -1 or rowString.find("Part 1") != -1 or rowString.find("Part1") != -1: alternative = part.createSub() else: alternative = part.createSub() streamCurCol = 0 hasMultipleParts = False for streamColumn in streamRow.iterfind("td"): streamCurCol += 1 streamColumnString = etree.tostring(streamColumn) if streamCurCol == 1: tmp = re.search("hoster/(.*?)\.png", streamColumnString) if tmp: hoster = tmp.group(1) alternative.hoster = hoster alternativePart = alternative.createSub() redirectUrl = re.search("a href=\"(.*?)\"", streamColumnString) if redirectUrl: alternativePart.url = redirectUrl.group(1) else: continue flv_type = re.search('src="images/hoster/(.*?).png"', streamColumnString) if flv_type: alternativePart.flv_type = flv_type.group(1) if streamCurCol == 2: # there can exist multiple langs but i take just one lang = re.search("lang/(..)\.png", streamColumnString) if lang: lang = lang.group(1) alternative.language = getLanguage(lang, 'de') if streamCurCol == 3: # there can exist multiple langs but i take just one lang = re.search("lang/(..)\.png", streamColumnString) if lang: lang = lang.group(1) alternative.subtitle = getLanguage(lang) if streamCurCol == 4: try: size = int(streamColumn.text) except: if streamColumn.text[:4] == "Part": # with the next part 1 we will create a new alternative hasMultipleParts = True else: log.warning("This media file might have multiple parts but not sure: %s", streamColumn.text) else: alternativePart.size = size tags = [] for i in ('Zielgruppe', 'Genres'): newTags = textextract(url.data, '<dt>'+i+'</dt>', '</dd>') if newTags: newTags = textextract(newTags, '<dd>', '') newTags = newTags.split(', ') tags.extend(newTags) year = textextract(url.data, '<dt>Jahr</dt>', '</dd>') year = textextract(year, '<dd>', '') try: media.year = int(year[:4]) except ValueError: log.warning("Problem with year in %s", link) media.addTags(tags) return self.afterExtract(media)
def getId(self): return textextract(self.flvUrl, 'streamcloud.eu/', '/')
def getId(self): return textextract(self.flvUrl, ".org/redirect/", "")
def getId(self): return textextract(self.flvUrl, '.com/file/', '')
def getId(self): id1 = textextract(self.flvUrl, *self.videoidExtract1) if id1 is None: return textextract(self.flvUrl, *self.videoidExtract2) return id1
def getId(self): return textextract(self.flvUrl, 'hellsmedia.com/v/', '')