def _get_data(self): ''' Config: List of servers. Will use servers in order. For example: ["hydrax","vidstream"] will prioritize the HydraX link. Available servers: links (below) and vidstream ''' links = { "gcloud": "https://gcloud.live/", "mp4upload": "https://www.mp4upload.com/", "cloud9": "https://cloud9.to", "hydrax": "https://hydrax.net", "mixdrop": "https://mixdrop.co" } url = self.url.replace('https:////', 'https://') url = url.replace('https://vidstreaming.io/download', 'https://vidstreaming.io/server.php') soup = helpers.soupify(helpers.get(url)) servers = Config._read_config()['siteconfig']['vidstream']['servers'] linkserver = soup.select('li.linkserver') logger.debug('Linkserver: {}'.format(linkserver)) for a in servers: if a == 'vidstream' and 'vidstream' in self.url: return self._get_link(soup) for b in linkserver: if b.get('data-video').startswith(links.get(a, 'None')): """ Another class needs to get created instead of using self not to impact future loops If the extractor fails vidstream.py will get run again with changed self """ info = self.__dict__.copy() info['url'] = b.get('data-video') _self = Extractor(info) return extractors.get_extractor(a)._get_data(_self)
def get_data(self): anime_id = self.url.split('info/')[-1].split('-')[0] url = self._api_url.format(anime_id) res = helpers.get(url) try: res = res.json() except Exception: logger.debug('Error with html {}'.format(res.text)) raise base_url = 'https://www.masterani.me/anime/watch/{}'.format( res['info']['slug']) + '/' episode_urls = [] for episode in res['episodes']: url = base_url + episode['info']['episode'] episode_urls.append((episode['info']['episode'], url)) self._episode_urls = episode_urls self.meta = res['info'] self.title = self.meta['title'] self._len = len(self._episode_urls) return self._episode_urls
def _get_sources(self): ids = self.url.split(",") ep = ids[0] realId = int(ids[0]) + int(ids[1]) + 2 _referer = ids[2] realUrl = helpers.post("https://kissanimefree.xyz/wp-admin/admin-ajax.php", referer=f"https://kissanimefree.xyz/episode/{_referer}-episode-{realId}/", data={"action":"kiss_player_ajax","server":"vidcdn","filmId":realId}).text realUrl = realUrl if realUrl.startswith('http') else "https:" + realUrl txt = helpers.get(realUrl).text # Group 2 and/or 3 is the vidstreaming links without https:// # Not used because I've yet to test if goto always leads to mp4 # vidstream_regex = r"window\.location\s=\s(\"|').*?(vidstreaming\.io/[^(\"|')]*?)\"|(vidstreaming\.io/goto\.php[^(\"|')]*?)(\"|')" vidstream_regex = r"window\.location\s=\s(\"|').*?(vidstreaming\.io/[^(\"|')]*?)\"" surl = re.search(vidstream_regex,txt) if surl: if surl.group(2): return [('vidstreaming', surl.group(2),)] return ''
def _get_sources(self): server = self.config.get("server", "trollvid") soup = helpers.soupify(helpers.get(self.url)) hosts = json.loads( soup.find("div", { "class": "spatry" }).previous_sibling.previous_sibling.text[21:-2])["videos"] _type = hosts[0]["type"] try: host = list( filter( lambda video: video["host"] == server and video["type"] == _type, hosts))[0] except IndexError: host = hosts[0] if host["host"] == "mp4upload" and len(hosts) > 1: host = hosts[1] name = host["host"] _id = host["id"] link = self.getLink(name, _id) return [(name, link)]
def _scrape_episodes(self): version = self.config.get('version', 'subbed') soup = helpers.soupify(helpers.get(self.url)) versions = soup.select_one('.card-body').select('ul') def get_links(version): links = [v.attrs['href'] for v in version.select('a')][::-1] return links dubbed = get_links(versions[1]) subbed = get_links(versions[0]) # TODO: This should be handled more gracefully # revist once config API is finalized if version.lower() == 'dubbed': choice = dubbed other = subbed else: choice = subbed other = dubbed if choice: return choice # TODO: warn about choice not available return other
def search(cls, query): cls.token = get_token() params = { 'search': query, 'token': cls.token } results = helpers.get('https://ani.api-web.site/advanced', params=params).json()['data'] # noqa if 'nav' in results: results = results['nav']['currentPage']['items'] search_results = [ SearchResult( title=i['name'], url='https://shiro.is/anime/' + i['slug'], poster='https://ani-cdn.api-web.site/' + i['image'], meta_info={ 'version_key_dubbed': '(Sub)' if i['language'] == 'subbed' else '(Dub)' # noqa } ) for i in results ] search_results = sorted(search_results, key=lambda x: len(x.title)) return search_results else: return []
def _get_sources(self): servers = { 'rapidvideo': '33', 'streamango': '12', 'mp4upload': '35', } server = self.config.get('server', 'mp4upload') params = {'id': self.url, 'server': servers[server], 'ts': self.ts} def get_stream_url(base_url, params, DD=None): params['_'] = int(generate_(params, DD=DD)) data = helpers.get(base_url, params=params).json() return data['target'] try: url = get_stream_url(self._base_url, params) except KeyError: try: del params['_'] del params['ts'] # I don't know if this is reliable or not. # For now it works. data = helpers.get('http://9anime.cloud/ajax/episode/info', params=params).json() url = data['target'] except Exception as e: raise AnimeDLError( '9anime probably changed their API again. Check the issues' 'here https://github.com/vn-ki/anime-downloader/issues. ' 'If it has not been reported yet, please open a new issue' ) from e return [ (server, url), ]
def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) self.title = str( soup.find('div', { 'class': 'widget info' }).find('h2', { 'class': 'title' }).text) self.image = str( soup.find('div', { 'class': 'widget info' }).find('img')['src']) # self._len = int(soup.find_all( # 'ul', ['episodes'])[-1].find_all('a')[-1]['data-base']) meta1 = soup.find('div', {'class': 'widget info'}).find_all('dl')[0] meta2 = soup.find('div', {'class': 'widget info'}).find_all('dl')[1] dd = meta1.find_all('dd') + meta2.find_all('dd') dt = meta1.find_all('dt') + meta2.find_all('dt') self.meta = dict( zip([tag.text.strip(': ') for tag in dt], [tag.text.strip() for tag in dd]))
def _get_data(self): # Need a javascript deobsufication api/python, so someone smarter # than me can work on that for now I will add the pattern I observed # alternatively you can pattern match on `src` for stream_url part source_parts_re = re.compile( r'action=\"([^"]+)\".*value=\"([^"]+)\".*Click Here to Download', re.DOTALL) # Kwik servers don't have direct link access you need to be referred # from somewhere, I will just use the url itself. download_url = self.url.replace('kwik.cx/e/', 'kwik.cx/f/') kwik_text = helpers.get(download_url, referer=download_url).text post_url, token = source_parts_re.search(kwik_text).group(1, 2) stream_url = helpers.post(post_url, referer=download_url, data={ '_token': token }, allow_redirects=False).headers['Location'] title = stream_url.rsplit('/', 1)[-1].rsplit('.', 1)[0] logger.debug('Stream URL: %s' % stream_url) return { 'stream_url': stream_url, 'meta': { 'title': title, 'thumbnail': '' }, 'referer': None }
def _scrape_episodes(self): r = helpers.get(self.url).text soup = helpers.soupify(r) # Allows fallback from both dub -> sub and sub -> dub # This makes it possible to download pokemon (for example) without having to change config. subbed = self.config['version'] != 'dubbed' subbed_converter = { True: 'div#episodes-sub', False: 'div#episodes-dub', } eps = soup.select_one(subbed_converter.get(subbed)).select('td > a') if not eps: logger.info('No episodes in selected language, falling back.') eps = soup.select_one( subbed_converter.get(not subbed)).select('td > a') if not eps: logger.info('No episodes found.') return [] episodes = ['https://kissanimex.com' + x.get('href') for x in eps][::-1] return episodes
def _get_data(self): # TODO: Provide referer by source referer = 'https://anistream.xyz' # The token expires after ~1 minute, making caching error future requests. req = helpers.get(self.url, referer=referer, cache=False) source_regex = r'<source src="(.*?)"' source = re.search(source_regex, req.text) # Matches: token="eyJ0eXA" token_regex = r"token\s*=\s*['\"|']([^\"']*)" token = re.search(token_regex, req.text) if source: return { 'stream_url': source.group() } elif token: token = token.group(1) # something like: 084df78d215a trollvid_id = self.url.split('/')[-1] post = helpers.post(f'https://mp4.sh/v/{trollvid_id}', data={'token': token}, referer=self.url, ).json() # {'success':True} on success. if post.get('success') and post.get('file'): return { 'stream_url': post['file'] } # In case neither methods work. return {'stream_url': ''}
def _get_sources(self): soup = helpers.soupify(helpers.get(self.url).text) servers = soup.select('div.server-watch#server-watch > a') if servers: servers = [x['data-link'] for x in servers] logger.debug( 'Hosts: ' + str([urllib.parse.urlparse(x).netloc for x in servers])) else: servers = soup.find_all('a', { 'data-link': True, 'class': 'panel-block' }) servers = [x['data-link'] for x in servers] sources = [] for i in servers: if 'clipwatching' in i: sources.append({ 'extractor': 'clipwatching', 'url': i, 'server': 'clipwatching', 'version': '1' }) elif 'streamtape' in i: sources.append({ 'extractor': 'streamtape', 'url': i, 'server': 'streamtape', 'version': '1' }) if sources: return self.sort_sources(sources) else: logger.error( 'No episode source was found, file might have been deleted.') return
def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) self.title = soup.select('h1.page-title')[0].text
def _scrape_episodes(self): url = self.url soup = helpers.soupify(helpers.get(url)) # v1 and v3 is embedded video player # v2 and v4 is json post request # ALL shit below really needs refactoring! if '/v2/' in self.url or '/v4/' in self.url: # Uses the id in the url and encodes it twice # NaN and N4CP9Eb6laO9N are permanent encoded variables found in # https://animixplay.com/assets/v4.min.js url_id = str.encode(self.url.split("/")[4]) post_id = f'NaN{base64.b64encode(url_id).decode()}N4CP9Eb6laO9N'.encode() post_id = base64.b64encode(post_id).decode() data_id = 'id2' if '/v4/' in self.url else 'id' # In extremely rare cases the anime isn't loaded and must be generated by the server first try: data = (helpers.post('https://animixplay.com/raw/2ENCwGVubdvzrQ2eu4hBH', data={data_id: post_id}).json()) # 400 HTTPError here except: if '/v4/' in self.url: data = (helpers.post('https://animixplay.com/e4/5SkyXQULLrn9OhR', data={'id': url.split('/')[-1]}).json())['epstream'] if '/v2' in self.url: data = (helpers.post('https://animixplay.com/e2/T23nBBj3NfRzTQx', data={'id': url.split('/')[-1]}).json())['epstream'] logger.debug(data) if '/v4/' in self.url: # Has a list of mp4 links. return data elif '/v2/' in self.url: # Has elaborate list for all metadata on episodes. episodes = [] for i in data: info_dict = i.get('src', None) # Looks like mp4 is always first in the list # Sometimes it returns None if info_dict: episodes.append(info_dict[0].get('file', '')) else: episodes.append('') return episodes else: # V5 and V1 are somewhat similar. servers = self.config['v5-servers'] try: ep_list = soup.find('div', {'id': 'epslistplace'}).get_text() logger.debug(ep_list) jdata = json.loads(ep_list) if '/v1/' in self.url: keyList = list(jdata.keys()) del keyList[0] logger.debug(keyList) return [jdata[x] for x in keyList if '.' in jdata[x]] else: for i in servers: if jdata.get(i): return jdata.get(i) return except json.decoder.JSONDecodeError: # Link generation url_dict = {'v5': '/e5/dZ40LAuJHZjuiWX', 'v1': '/e1/9DYiGVLD7ASqZ5p'} if '/v5/' in self.url: version = 'v5' else: version = 'v1' # Not sure if v5 id for data works. data = (helpers.post('https://animixplay.to' + url_dict[version], data={'id': url.split('/')[-1]}).json())['epstream'] logger.debug('Data: {}'.format(data)) if '/v1/' in self.url: return [data[i] for i in data if i != 'eptotal'] else: for i in servers: if jdata.get(i): return jdata.get(i) return
def _scrape_episodes(self): # Only uses the direct download links for consistency. soup = helpers.soupify(helpers.get(self.url)) elements = soup.select('article.post a') return [i.get('href') for i in elements if 'Direct Download' in i.text]
def _scrape_metadata(self): self.title = helpers.soupify(helpers.get(self.url).text).select_one("span.animetitle").get_text()
def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) self.title = soup.h1.text
def _scrape_metadata(self): self.title = helpers.soupify(helpers.get( self.url)).select('h1.title')[0].text
def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) self.title = soup.select('div.amin_week_box_up1 > h1')[0].text
def bypass(self): host = "https://erai-raws.info" resp = helpers.get("https://check.ddos-guard.net/check.js").text ddosBypassPath = re.search("'(.*?)'", resp).groups()[0] helpers.get(host + ddosBypassPath)
def _scrape_episodes(self): soup = helpers.soupify(helpers.get( self.url)).select('div.episode_list > a') return ['https:' + a.get('href') for a in soup[::-1]]
def _scrape_metadata(self): self.title = helpers.soupify(helpers.get(self.url)).select('div.page-title > h1')[0].text
def _get_url(self, url): #The links are hidden on other pages soup = helpers.soupify(helpers.get(url)) return (soup.select('iframe')[-1].get('src'))
def _scrape_metadata(self): realUrl = self.url.replace("_anime", "kissanime") soup = helpers.soupify(helpers.get(realUrl)).select('div.film > h1') self.title = soup[0].text
def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) self.title = soup.find("h1").find("span").text
def _get_sources(self): soup = helpers.soupify(helpers.get(self.url)) soup = helpers.soupify(helpers.get(soup.iframe.get("src"))) return [("no_extractor", soup.source.get("src"))]
def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url)) self.title = soup.select_one('.card-header > h1').text
def _get_sources(self): r = helpers.get(self.url).text link = re.search(r'\"file\"\:\"(.*?)\"', r)[1] return [('no_extractor', link)]
def _scrape_metadata(self): soup = helpers.soupify(helpers.get(self.url).text) self.title = soup.select_one('h2.Title').text
def get_token(): r = helpers.get('https://shiro.is').text script = 'https://shiro.is' + re.search(r'src\=\"(\/static\/js\/main\..*?)\"', r)[1] # noqa script = helpers.get(script).text token = re.search(r'token\:\"(.*?)\"', script)[1] return token