Пример #1
0
    def map_first_case(self, data):
        '''For the first case, get the real link and extract the episode
        number.'''

        input_title, link = data

        try:
            episode_number = int(''.join(
                c for c in input_title if c.isdigit()
            ))
        except ValueError:
            return

        if not self._should_process(episode_number):
            return

        self.logger.info('Processing episode {}'.format(
            episode_number
        ))

        self._add_sources(episode_number, unshorten(link))

        self.logger.info('Done Processing episode {}'.format(
            episode_number
        ))
Пример #2
0
    def map_second_case(self, episode_link):
        '''For the second, get the page (below this method) and then extract
        the links from that page.'''

        # The string is formatted as '\s+Episode (\d+)\s+'
        episode_identifier = ''.join(
            x for x in (episode_link.previous_sibling.strip())
            if x.isdigit()
        )

        try:
            episode_number = int(episode_identifier)
        except:
            return None

        if not self._should_process(episode_number):
            return

        self.logger.info('Processing episode {}'.format(
            episode_number
        ))

        # The links are shortened there too..
        episode_link = self._unshorten(
            episode_link.get('href')
        )

        self._add_sources(episode_number, unshorten(episode_link))

        self.logger.info('[AnimeChiby] Finished processing episode {}'.format(
            episode_number
        ))
Пример #3
0
    def page_worker(self, data):
        version, id, number = data
        number = int(number)

        if not self._should_process(number):
            return

        self.logger.debug('Processing episode {}'.format(number))

        id = id[7:]  # Remove '/anime'

        url = 'http://rawranime.tv/watch/{}/episode-{}'.format(id, number)
        soup = self._get(url)

        elements = soup.find_all(lambda x: x.name == 'div' and x.has_attr(
            'data-src') and x.has_attr('data-quality'))

        for element in elements:
            if element.get('data-lang').lower() != version.lower():
                continue

            quality = int(''.join(x for x in element.get('data-quality')
                                  if x.isdigit()))

            src = element.get('data-src')

            self.logger.debug('Unshortening {}, quality {}p'.format(
                src, quality))

            self._add_sources(number, unshorten(src, quality=quality))

        self.logger.debug('Done processing episode {}'.format(number))
Пример #4
0
    def _episode_worker(self, url):
        soup = self._get(url)
        episode_number = int(soup.select('#default_ep')[0].get('value'))

        if not self._should_process(episode_number):
            return

        self.logger.info('Processing episode {}'.format(episode_number))

        self._add_sources(episode_number, unshorten(
            soup.select('div.download-anime > a')[0].get('href')
        ))

        self.logger.info('Done processing episode {}'.format(episode_number))
Пример #5
0
    def _episode_worker(self, url):
        episode_number = int(
            re.search(r'/s/\d+/episode/(\d{1,3})', url).group(1))

        if not self._should_process(episode_number):
            return

        self.logger.info('Processing episode {}'.format(episode_number))

        soup = self._get(url)
        src = soup.find('iframe', {'id': 'ep-video'}).get('src')

        self._add_sources(episode_number, unshorten(src))
        self.logger.info('Done processing episode {}'.format(episode_number))
Пример #6
0
    def _parse_source(self, identifier, url, soup):
        script = soup.find('script',
                           text=re.compile(r'^document.write\(unescape'))

        if script is None:
            self.logger.warning('Could not process {}'.format(url))
            return

        embed_js = urllib.parse.unquote(
            re.sub(r'([A-Z\~\!\@\#\$\*\{\}\[\]\-\+\.])', '', script.text))

        iframe_src = BeautifulSoup(embed_js,
                                   'html.parser').find('iframe').get('src')

        self._add_sources(identifier, unshorten(iframe_src))
Пример #7
0
    def _source_worker(self, referer, t, sec, source):
        id = source.get('onclick')[3:-1]

        success = False

        while not success:
            source_url = self._get_url() + (
                '/membersonly/components/com_iceplayer/video.'
                'phpAjaxResp.php?s={}&t={}').format(id, t)

            payload = {
                'id': id,
                's': str(random.randint(10000, 10060)),
                'iqs': '',
                'url': '',
                'm': str(random.randint(10000, 10500)),
                'cap': ' ',
                'sec': sec,
                't': t
            }

            headers = {'Referer': referer}

            response, soup = self._post(source_url,
                                        data=payload,
                                        return_response=True,
                                        headers=headers)

            if len(response.text) > 0:
                success = True
            else:
                time.sleep(random.randint(1, 3))

        # The url is something like 'blah?url=some_url?url=the url'.
        encoded_sub_url = urllib.parse.parse_qsl(
            urllib.parse.urlparse(
                response.text).query)[0][1]  # First item, then value

        host_url = urllib.parse.parse_qsl(
            urllib.parse.urlparse(encoded_sub_url).query)[0][1]

        try:
            self.sources.add_sources(unshorten(host_url))
        except:
            pass
Пример #8
0
    def extract(self, result):
        id = re.search(r'/.+?-(\d+)/$', result).group(1)
        soup = self._get(
            'http://123movies.to/ajax/v2_get_episodes/{}'.format(id))

        urls = [(x.get('episode-id'),
                 requests.get('http://123movies.to/ajax/load_embed/' +
                              x.get('episode-id')).json()['embed_url'])
                for x in soup.find_all('a', {'class': 'btn-eps'})]

        data = []

        for episode_id, url in urls:
            if url != '':
                try:
                    self._add_sources(unshorten(url))
                except:
                    pass
Пример #9
0
    def episode_worker(self, link):
        '''Extract the available sources from a link to an episode.'''

        episode_number_search = re.search(r'http://.+-episode-([0-9]+)', link)

        if not episode_number_search:
            return

        episode_number = int(episode_number_search.group(1))

        if not self._should_process(episode_number):
            return

        self.logger.info('Processing episode {}'.format(episode_number))

        soup = self._get(link)

        download_div = soup.find('div', {'class': 'download_feed_link'})

        if not download_div:
            return

        download_links = list((x.find('span'), x.get('href'))
                              for x in download_div.find_all('a'))

        for quality_span, url in download_links:
            # For certain videos, the download link is available on the
            # website. We can directly fetch those links.
            if quality_span is not None:
                quality = int(''.join(x for x in quality_span.text
                                      if x.isdigit()))

                self._add_source(episode_number, source)
                continue

            # Else, we just try to use our unshortener
            self._add_sources(episode_number, unshorten(url))

        self.logger.info('Done processing episode {}'.format(episode_number))
Пример #10
0
    def episode_worker(self, data):
        slug, episode_details = data

        number = int(episode_details['episode'])

        if not self._should_process(number):
            return

        self.logger.info('Processing episode {}'.format(number))

        url = 'http://www.masterani.me/anime/watch/{}/{}'.format(slug, number)

        mirrors = json.loads(re.search(
            r'var args = {[\s\S\n]+mirrors:[\s\S\n]+(\[.+?\]),[\s\S\n]+episode'
            r':',
            self.session.get(url).text
        ).group(1).strip().replace('\n', ''))

        for mirror in mirrors:
            prefix = mirror['host']['embed_prefix']
            suffix = mirror['host']['embed_suffix']

            if not prefix:
                prefix = ''

            if not suffix:
                suffix = ''

            url = prefix + mirror['embed_id'] + suffix

            self.logger.debug('Found mirror source: {}'.format(url))

            sources = unshorten(url, quality=mirror['quality'])
            self._add_sources(number, sources)

        self.logger.info('Done processing episode {}'.format(number))
Пример #11
0
def run_unshortener(url, quality):
    sources = unshorten(url, quality=quality)
    pprint.pprint(sources, indent=4)