def map_first_case(self, data): '''For the first case, get the real link and extract the episode number.''' input_title, link = data try: episode_number = int(''.join( c for c in input_title if c.isdigit() )) except ValueError: return if not self._should_process(episode_number): return self.logger.info('Processing episode {}'.format( episode_number )) self._add_sources(episode_number, unshorten(link)) self.logger.info('Done Processing episode {}'.format( episode_number ))
def map_second_case(self, episode_link): '''For the second, get the page (below this method) and then extract the links from that page.''' # The string is formatted as '\s+Episode (\d+)\s+' episode_identifier = ''.join( x for x in (episode_link.previous_sibling.strip()) if x.isdigit() ) try: episode_number = int(episode_identifier) except: return None if not self._should_process(episode_number): return self.logger.info('Processing episode {}'.format( episode_number )) # The links are shortened there too.. episode_link = self._unshorten( episode_link.get('href') ) self._add_sources(episode_number, unshorten(episode_link)) self.logger.info('[AnimeChiby] Finished processing episode {}'.format( episode_number ))
def page_worker(self, data): version, id, number = data number = int(number) if not self._should_process(number): return self.logger.debug('Processing episode {}'.format(number)) id = id[7:] # Remove '/anime' url = 'http://rawranime.tv/watch/{}/episode-{}'.format(id, number) soup = self._get(url) elements = soup.find_all(lambda x: x.name == 'div' and x.has_attr( 'data-src') and x.has_attr('data-quality')) for element in elements: if element.get('data-lang').lower() != version.lower(): continue quality = int(''.join(x for x in element.get('data-quality') if x.isdigit())) src = element.get('data-src') self.logger.debug('Unshortening {}, quality {}p'.format( src, quality)) self._add_sources(number, unshorten(src, quality=quality)) self.logger.debug('Done processing episode {}'.format(number))
def _episode_worker(self, url): soup = self._get(url) episode_number = int(soup.select('#default_ep')[0].get('value')) if not self._should_process(episode_number): return self.logger.info('Processing episode {}'.format(episode_number)) self._add_sources(episode_number, unshorten( soup.select('div.download-anime > a')[0].get('href') )) self.logger.info('Done processing episode {}'.format(episode_number))
def _episode_worker(self, url): episode_number = int( re.search(r'/s/\d+/episode/(\d{1,3})', url).group(1)) if not self._should_process(episode_number): return self.logger.info('Processing episode {}'.format(episode_number)) soup = self._get(url) src = soup.find('iframe', {'id': 'ep-video'}).get('src') self._add_sources(episode_number, unshorten(src)) self.logger.info('Done processing episode {}'.format(episode_number))
def _parse_source(self, identifier, url, soup): script = soup.find('script', text=re.compile(r'^document.write\(unescape')) if script is None: self.logger.warning('Could not process {}'.format(url)) return embed_js = urllib.parse.unquote( re.sub(r'([A-Z\~\!\@\#\$\*\{\}\[\]\-\+\.])', '', script.text)) iframe_src = BeautifulSoup(embed_js, 'html.parser').find('iframe').get('src') self._add_sources(identifier, unshorten(iframe_src))
def _source_worker(self, referer, t, sec, source): id = source.get('onclick')[3:-1] success = False while not success: source_url = self._get_url() + ( '/membersonly/components/com_iceplayer/video.' 'phpAjaxResp.php?s={}&t={}').format(id, t) payload = { 'id': id, 's': str(random.randint(10000, 10060)), 'iqs': '', 'url': '', 'm': str(random.randint(10000, 10500)), 'cap': ' ', 'sec': sec, 't': t } headers = {'Referer': referer} response, soup = self._post(source_url, data=payload, return_response=True, headers=headers) if len(response.text) > 0: success = True else: time.sleep(random.randint(1, 3)) # The url is something like 'blah?url=some_url?url=the url'. encoded_sub_url = urllib.parse.parse_qsl( urllib.parse.urlparse( response.text).query)[0][1] # First item, then value host_url = urllib.parse.parse_qsl( urllib.parse.urlparse(encoded_sub_url).query)[0][1] try: self.sources.add_sources(unshorten(host_url)) except: pass
def extract(self, result): id = re.search(r'/.+?-(\d+)/$', result).group(1) soup = self._get( 'http://123movies.to/ajax/v2_get_episodes/{}'.format(id)) urls = [(x.get('episode-id'), requests.get('http://123movies.to/ajax/load_embed/' + x.get('episode-id')).json()['embed_url']) for x in soup.find_all('a', {'class': 'btn-eps'})] data = [] for episode_id, url in urls: if url != '': try: self._add_sources(unshorten(url)) except: pass
def episode_worker(self, link): '''Extract the available sources from a link to an episode.''' episode_number_search = re.search(r'http://.+-episode-([0-9]+)', link) if not episode_number_search: return episode_number = int(episode_number_search.group(1)) if not self._should_process(episode_number): return self.logger.info('Processing episode {}'.format(episode_number)) soup = self._get(link) download_div = soup.find('div', {'class': 'download_feed_link'}) if not download_div: return download_links = list((x.find('span'), x.get('href')) for x in download_div.find_all('a')) for quality_span, url in download_links: # For certain videos, the download link is available on the # website. We can directly fetch those links. if quality_span is not None: quality = int(''.join(x for x in quality_span.text if x.isdigit())) self._add_source(episode_number, source) continue # Else, we just try to use our unshortener self._add_sources(episode_number, unshorten(url)) self.logger.info('Done processing episode {}'.format(episode_number))
def episode_worker(self, data): slug, episode_details = data number = int(episode_details['episode']) if not self._should_process(number): return self.logger.info('Processing episode {}'.format(number)) url = 'http://www.masterani.me/anime/watch/{}/{}'.format(slug, number) mirrors = json.loads(re.search( r'var args = {[\s\S\n]+mirrors:[\s\S\n]+(\[.+?\]),[\s\S\n]+episode' r':', self.session.get(url).text ).group(1).strip().replace('\n', '')) for mirror in mirrors: prefix = mirror['host']['embed_prefix'] suffix = mirror['host']['embed_suffix'] if not prefix: prefix = '' if not suffix: suffix = '' url = prefix + mirror['embed_id'] + suffix self.logger.debug('Found mirror source: {}'.format(url)) sources = unshorten(url, quality=mirror['quality']) self._add_sources(number, sources) self.logger.info('Done processing episode {}'.format(number))
def run_unshortener(url, quality): sources = unshorten(url, quality=quality) pprint.pprint(sources, indent=4)