Exemplo n.º 1
0
 def _torrent_urls(self, url):
     '''Iterate over torrent urls fetched from the raw html data.
     '''
     browser = Browser()
     if browser.open(url):
         for link in browser.links(url_regex=RE_URL_MAGNET):
             yield link.absolute_url
Exemplo n.º 2
0
 def _get_thumbnail_url(self, url):
     browser = Browser()
     browser.open(url)
     img_ = browser.cssselect('.movie_poster_area img')
     if img_:
         url = img_[0].get('src')
         if not RE_INVALID_IMG.search(url):
             return url
Exemplo n.º 3
0
    def _get_media_info(self, url):
        browser = Browser()
        browser.open(url)

        info = {}

        band_ = browser.cssselect('.band_name')
        if band_:
            info['artist'] = clean(band_[0].text, 1)

        genre_ = browser.cssselect('.product_genre .data')
        if genre_:
            info['genre'] = [clean(g, 1) for g in genre_[0].text.split(',')]

        return info
Exemplo n.º 4
0
def _get_collection(url):
    browser = Browser()
    browser.open(url)

    res = []
    for tr in browser.cssselect('table#r2 tr', []):
        if tr.cssselect('th'):
            continue

        refs = tr.cssselect('input[type="checkbox"]')
        if not refs:
            continue
        ref = refs[0].get('name')
        if not ref:
            logger.error('failed to get reference from %s', html.tostring(refs[0], pretty_print=True))
            continue
        res.append(ref)

    return res
Exemplo n.º 5
0
    def _mirror_urls(self, url):
        '''Iterate over mirror urls.
        '''
        browser = Browser()
        browser.open(url)
        results = browser.cssselect('div.download dl')
        if not results:
            errors = browser.cssselect('div.error')
            if not (errors and RE_ERROR.search(errors[0].text)):
                logger.error('failed to get mirror urls from %s', url)
            return

        for result in results:
            # Skip sponsored links
            res = result.cssselect('dd')
            if res and RE_SPONSORED_LINK.search(html.tostring(res[0])):
                continue

            links = result.cssselect('dt a')
            if links:
                mirror_url = links[0].get('href')
                if not mirror_url.startswith('/'):
                    yield mirror_url
Exemplo n.º 6
0
def _get_download_url(url):
    browser = Browser()
    netloc_parts = urlparse(url).netloc.split('.')

    if 'mediafire' in netloc_parts:
        browser.open(url)
        if browser.cssselect('#form_captcha'):
            raise FilestubeError('failed to get url from %s: captcha' % url)
        tags = browser.cssselect('.error_msg_title')
        if tags:
            raise FilestubeError('failed to get download url from %s: %s' % (url, tags[0].text))

        tags = browser.cssselect('.download_link')
        if tags:
            data_ = html.tostring(tags[0])
            res = re.compile(r'"(http://.*?)"').findall(data_)
            if res:
                return res[0]

    raise FilestubeError('failed to get download url from %s' % url)
Exemplo n.º 7
0
 def _get_torrent_url(self, url):
     browser = Browser()
     if browser.open(url):
         links = browser.cssselect('a[title="Magnet Link"]')
         if links:
             return links[0].get('href')
Exemplo n.º 8
0
 def _get_torrent_url(self, url):
     browser = Browser()
     if browser.open(url):
         links = browser.cssselect('a.btn-magnet')
         if links:
             return links[0].get('href')