Пример #1
0
    def url_rewrite(self, task, entry):
        if 'url' not in entry:
            log.error("Didn't actually get a URL...")
        else:
            url = entry['url']
            log.debug("Got the URL: %s" % entry['url'])
            rawdata = ""
            try:
                request = urllib2.Request(url)
                response = urllib2.urlopen(request)
            except Exception as e:
                raise UrlRewritingError("Connection Error for %s : %s" % (url, e))
            rawdata = response.read()

            match = re.search(r"<a href=\"/torrents/download/\?id=(\d*?)\">.*\.torrent</a>", rawdata)
            if match:
                torrent_id = match.group(1)
                log.debug("Got the Torrent ID: %s" % torrent_id)
                entry['url'] = 'http://www.t411.me/torrents/download/?id=' + torrent_id
                if 'download_auth' in list(entry):
                    auth_handler = t411Auth(entry['download_auth'][0],
                                            entry['download_auth'][1])

                    entry['download_auth'] = auth_handler
            else:
                raise UrlRewritingError("Cannot find torrent ID")
Пример #2
0
    def url_rewrite(self, task, entry):
        if 'url' not in entry:
            log.error("Didn't actually get a URL...")
        else:
            url = entry['url']
            log.debug("Got the URL: %s" % entry['url'])
            rawdata = ""
            try:
                opener = urllib2.build_opener()
                opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                response = opener.open(url)
            except Exception as e:
                raise UrlRewritingError("Connection Error for %s : %s" %
                                        (url, e))
            rawdata = response.read()

            match = re.search(
                r"<a href=\"/torrents/download/\?id=(\d*?)\">.*\.torrent</a>",
                rawdata)
            if match:
                torrent_id = match.group(1)
                log.debug("Got the Torrent ID: %s" % torrent_id)
                entry[
                    'url'] = 'http://www.t411.in/torrents/download/?id=' + torrent_id
                if 'download_auth' in entry:
                    auth_handler = t411Auth(*entry['download_auth'])
                    entry['download_auth'] = auth_handler
            else:
                raise UrlRewritingError("Cannot find torrent ID")
Пример #3
0
    def parse_download_page(self, url):
        if 'newpct1.com' in url:
            log.verbose('Newpct1 URL: %s', url)
            url = url.replace('newpct1.com/', 'newpct1.com/descarga-torrent/')
        else:
            log.verbose('Newpct URL: %s', url)

        try:
            page = requests.get(url)
        except requests.exceptions.RequestException as e:
            raise UrlRewritingError(e)
        try:
            soup = get_soup(page.text)
        except Exception as e:
            raise UrlRewritingError(e)

        if 'newpct1.com' in url:
            torrent_id_prog = re.compile(r'descargar-torrent/(.+)/')
            torrent_ids = soup.findAll(href=torrent_id_prog)
        else:
            torrent_id_prog = re.compile("'(?:torrentID|id)'\s*:\s*'(\d+)'")
            torrent_ids = soup.findAll(text=torrent_id_prog)

        if len(torrent_ids) == 0:
            raise UrlRewritingError('Unable to locate torrent ID from url %s' %
                                    url)

        if 'newpct1.com' in url:
            torrent_id = torrent_id_prog.search(
                torrent_ids[0]['href']).group(1)
            return 'http://www.newpct1.com/download/%s.torrent' % torrent_id
        else:
            torrent_id = torrent_id_prog.search(torrent_ids[0]).group(1)
            return 'http://www.newpct.com/torrents/{:0>6}.torrent'.format(
                torrent_id)
Пример #4
0
    def url_rewrite(self, task, entry):
        url = entry['url']
        page = None
        for (scheme, netloc) in EZTV_MIRRORS:
            try:
                _, _, path, params, query, fragment = urlparse(url)
                url = urlunparse(
                    (scheme, netloc, path, params, query, fragment))
                page = task.requests.get(url).content
            except RequestException as e:
                log.debug('Eztv mirror `%s` seems to be down', url)
                continue
            break

        if not page:
            raise UrlRewritingError('No mirrors found for url %s' %
                                    entry['url'])

        log.debug('Eztv mirror `%s` chosen', url)
        try:
            soup = get_soup(page)
            mirrors = soup.find_all(
                'a', attrs={'class': re.compile(r'download_\d')})
        except Exception as e:
            raise UrlRewritingError(e)

        log.debug('%d torrent mirrors found', len(mirrors))

        if not mirrors:
            raise UrlRewritingError(
                'Unable to locate download link from url %s' % url)

        entry['urls'] = [m.get('href') for m in mirrors]
        entry['url'] = mirrors[0].get('href')
Пример #5
0
    def parse_downloads(self, series_url, search_title):
        page = requests.get(series_url).content
        try:
            soup = get_soup(page)
        except Exception as e:
            raise UrlRewritingError(e)

        urls = []
        # find all titles
        episode_titles = self.find_all_titles(search_title)
        if not episode_titles:
            raise UrlRewritingError('Unable to find episode')

        for ep_title in episode_titles:
            # find matching download
            episode_title = soup.find('strong',
                                      text=re.compile(ep_title, re.I))
            if not episode_title:
                continue

            # find download container
            episode = episode_title.parent
            if not episode:
                continue

            # find episode language
            episode_lang = episode.find_previous(
                'strong', text=re.compile('Sprache')).next_sibling
            if not episode_lang:
                log.warning('No language found for: %s' % series_url)
                continue

            # filter language
            if not self.check_language(episode_lang):
                log.warning('languages not matching: %s <> %s' %
                            (self.config['language'], episode_lang))
                continue

            # find download links
            links = episode.find_all('a')
            if not links:
                log.warning('No links found for: %s' % series_url)
                continue

            for link in links:
                if not link.has_attr('href'):
                    continue

                url = link['href']
                pattern = 'http:\/\/download\.serienjunkies\.org.*%s_.*\.html' % self.config[
                    'hoster']

                if re.match(pattern, url) or self.config['hoster'] == 'all':
                    urls.append(url)
                else:
                    continue
        return urls
Пример #6
0
 def parse_download_page(self, url, requests):
     txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
     page = requests.get(url, headers=txheaders)
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(e)
     down_link = soup.find('a', attrs={'href': re.compile(".+mp4")})
     if not down_link:
         raise UrlRewritingError('Unable to locate download link from url %s' % url)
     return down_link.get('href')
Пример #7
0
    def parse_download(self, series_url, search_title, config, entry):
        page = requests.get(series_url).content
        try:
            soup = get_soup(page)
        except Exception as e:
            raise UrlRewritingError(e)

        config = config or {}
        config.setdefault('hoster', 'ul')
        config.setdefault('language', 'en')

        # find matching download
        episode_title = soup.find('strong', text=search_title)
        if not episode_title:
            raise UrlRewritingError('Unable to find episode')

        # find download container
        episode = episode_title.parent
        if not episode:
            raise UrlRewritingError('Unable to find episode container')

        # find episode language
        episode_lang = episode.find_previous(
            'strong', text=re.compile('Sprache')).next_sibling
        if not episode_lang:
            raise UrlRewritingError('Unable to find episode language')

        # filter language
        if config['language'] in ['de', 'both']:
            if not re.search(
                    'german|deutsch', episode_lang, flags=re.IGNORECASE):
                entry.reject('Language does not match')
        if config['language'] in ['en', 'both']:
            if not re.search('englisc?h', episode_lang, flags=re.IGNORECASE):
                entry.reject('Language does not match')

        # find download links
        links = episode.find_all('a')
        if not links:
            raise UrlRewritingError('Unable to find download links')

        for link in links:
            if not link.has_attr('href'):
                continue

            url = link['href']
            pattern = 'http:\/\/download\.serienjunkies\.org.*%s_.*\.html' % config[
                'hoster']

            if re.match(pattern, url):
                return url
            else:
                log.debug('Hoster does not match')
                continue
Пример #8
0
 def parse_download_page(self, url, requests):
     txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
     page = requests.get(url, headers=txheaders)
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(e)
     tag_a = soup.find('a', attrs={'class': 'download_link'})
     if not tag_a:
         raise UrlRewritingError('Unable to locate download link from url %s' % url)
     torrent_url = 'http://www.bakabt.com' + tag_a.get('href')
     return torrent_url
Пример #9
0
 def parse_download_page(self, url):
     page = requests.get(url)
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(e)
     torrent_id_prog = re.compile("'(?:torrentID|id)'\s*:\s*'(\d+)'")
     torrent_ids = soup.findAll(text=torrent_id_prog)
     if len(torrent_ids) == 0:
         raise UrlRewritingError('Unable to locate torrent ID from url %s' % url)
     torrent_id = torrent_id_prog.search(torrent_ids[0]).group(1)
     return 'http://www.newpct.com/descargar/torrent/%s/dummy.html' % torrent_id
Пример #10
0
 def url_from_page(self, url):
     """Parses torrent url from newtorrents download page"""
     try:
         page = urlopener(url, log)
         data = page.read()
     except urllib2.URLError:
         raise UrlRewritingError('URLerror when retrieving page')
     p = re.compile("copy\(\'(.*)\'\)", re.IGNORECASE)
     f = p.search(data)
     if not f:
         # the link in which plugin relies is missing!
         raise UrlRewritingError('Failed to get url from download page. Plugin may need a update.')
     else:
         return f.group(1)
 def parse_download_page(self, page_url):
     page = urlopener(page_url, log)
     try:
         soup = get_soup(page)
     except Exception as e:
         raise UrlRewritingError(e)
     tag_a = soup.find("a", {"class": "dl_link"})
     if not tag_a:
         raise UrlRewritingError(
             'FTDB Unable to locate download link from url %s and tag_a is : %s'
             % (page_url, tag_a))
     torrent_url = "http://www3.frenchtorrentdb.com" + tag_a.get(
         'href') + "&js=1"
     log.debug('TORRENT URL is : %s' % torrent_url)
     return torrent_url
Пример #12
0
 def parse_download_page(self, url):
     page = requests.get(url,verify=False).content
     try:
         soup = get_soup(page)
         tag_div = soup.find('div', attrs={'class': 'download'})
         if not tag_div:
             raise UrlRewritingError('Unable to locate download link from url %s' % url)
         tag_a = tag_div.find('a')
         torrent_url = tag_a.get('href')
         # URL is sometimes missing the schema
         if torrent_url.startswith('//'):
             torrent_url = 'http:' + torrent_url
         return torrent_url
     except Exception as e:
         raise UrlRewritingError(e)
Пример #13
0
    def parse_download_page(self, url):
        page = urlopener(url, log)
        log.debug('%s opened', url)
        try:
            soup = get_soup(page)
            torrent_url = 'http://www.t411.me' + soup.find(
                text='Télécharger').findParent().get('href')
        except Exception as e:
            raise UrlRewritingError(e)

        if not torrent_url:
            raise UrlRewritingError(
                'Unable to locate download link from url %s' % url)

        return torrent_url
Пример #14
0
class UrlRewriteNewPCT(object):
    """NewPCT urlrewriter."""

    # urlrewriter API
    def url_rewritable(self, task, entry):
        url = entry['url']
        if url.startswith('http://www.newpct.com/download/'):
            return False
        if url.startswith('http://www.newpct.com/') or url.startswith('http://newpct.com/'):
            return True
        return False

    # urlrewriter API
    def url_rewrite(self, task, entry):
        entry['url'] = self.parse_download_page(entry['url'])

    @internet(log)
    def parse_download_page(self, url):
        txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
        req = urllib2.Request(url, None, txheaders)
        page = urlopener(req, log)
        try:
            soup = get_soup(page)
        except Exception, e:
            raise UrlRewritingError(e)
        down_link = soup.find('a', attrs={'href': re.compile("descargar/torrent/")})
        if not down_link:
            raise UrlRewritingError('Unable to locate download link from url %s' % url)
        return down_link.get('href')
Пример #15
0
 def parse_download_page(self, url):
     txheaders = {
         'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
     }
     req = urllib2.Request(url, None, txheaders)
     page = urlopener(req, log)
     try:
         soup = get_soup(page)
     except Exception as e:
         raise UrlRewritingError(e)
     down_link = soup.find(
         'a', attrs={'href': re.compile("download/\d+/.*\.torrent")})
     if not down_link:
         raise UrlRewritingError(
             'Unable to locate download link from url %s' % url)
     return 'http://www.deadfrog.us/' + down_link.get('href')
Пример #16
0
    def url_rewrite(self, task, entry):
        url = entry['url']
        if (url.startswith('http://www.newtorrents.info/?q=') or
           url.startswith('http://www.newtorrents.info/search')):
            try:
                url = self.entries_from_search(entry['title'], url=url)[0]['url']
            except PluginWarning as e:
                raise UrlRewritingError(e.value)
        else:
            url = self.url_from_page(url)

        if url:
            entry['url'] = url
            self.resolved.append(url)
        else:
            raise UrlRewritingError('Bug in newtorrents urlrewriter')
Пример #17
0
    def url_rewrite(self, task, entry):
        log.debug('Requesting %s' % entry['url'])
        page = requests.get(entry['url'])
        soup = get_soup(page.text)

        for link in soup.findAll('a', attrs={'href': re.compile(r'^/url')}):
            # Extract correct url from google internal link
            href = 'http://google.com' + link['href']
            args = parse_qs(urlparse(href).query)
            href = args['q'][0]

            # import IPython; IPython.embed()
            # import sys
            # sys.exit(1)
            # href = link['href'].lstrip('/url?q=').split('&')[0]

            # Test if entry with this url would be recognized by some urlrewriter
            log.trace('Checking if %s is known by some rewriter' % href)
            fake_entry = {'title': entry['title'], 'url': href}
            urlrewriting = plugin.get_plugin_by_name('urlrewriting')
            if urlrewriting['instance'].url_rewritable(task, fake_entry):
                log.debug('--> rewriting %s (known url pattern)' % href)
                entry['url'] = href
                return
            else:
                log.debug('<-- ignoring %s (unknown url pattern)' % href)
        raise UrlRewritingError('Unable to resolve')
Пример #18
0
    def url_rewrite(self, task, entry):
        url = entry['url']
        if (url.startswith('http://www.newtorrents.info/?q=')
                or url.startswith('http://www.newtorrents.info/search')):
            results = self.entries_from_search(entry['title'], url=url)
            if not results:
                raise UrlRewritingError("No matches for %s" % entry['title'])
            url = results[0]['url']
        else:
            url = self.url_from_page(url)

        if url:
            entry['url'] = url
            self.resolved.append(url)
        else:
            raise UrlRewritingError('Bug in newtorrents urlrewriter')
Пример #19
0
 def parse_download_page(self, url):
     txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
     req = urllib2.Request(url, None, txheaders)
     page = urlopener(req, log)
     try:
         soup = get_soup(page)
     except Exception, e:
         raise UrlRewritingError(e)
Пример #20
0
    def get_login_cookies(self, username, password):
        url_auth = 'http://www.t411.me/users/login'
        db_session = Session()
        account = db_session.query(torrent411Account).filter(
            torrent411Account.username == username).first()
        if account:
            if account.expiry_time < datetime.now():
                db_session.delete(account)
                db_session.commit()
            log.debug("Cookies found in db!")
            return account.auth
        else:
            log.debug("Getting login cookies from : %s " % url_auth)
            params = urllib.urlencode({
                'login': username,
                'password': password,
                'remember': '1'
            })
            cj = cookielib.CookieJar()
            #           WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            #           NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK
            opener.addheaders = [('User-agent', self.USER_AGENT)]
            try:
                opener.open(url_auth, params)
            except Exception as e:
                raise UrlRewritingError("Connection Error for %s : %s" %
                                        (url_auth, e))

            authKey = None
            uid = None
            password = None

            for cookie in cj:
                if cookie.name == "authKey":
                    authKey = cookie.value
                if cookie.name == "uid":
                    uid = cookie.value
                if cookie.name == "pass":
                    password = cookie.value

            if authKey is not None and \
               uid is not None and \
               password is not None:
                authCookie = {
                    'uid': uid,
                    'password': password,
                    'authKey': authKey
                }
                db_session.add(
                    torrent411Account(username=username,
                                      auth=authCookie,
                                      expiry_time=datetime.now() +
                                      timedelta(days=1)))
                db_session.commit()
                return authCookie

        return {"uid": "", "password": "", "authKey": ""}
Пример #21
0
 def parse_download_page(self, page_url, requests):
     page = requests.get(page_url)
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(e)
     tag_a = soup.find("a", {"class": "dl_link"})
     if not tag_a:
         if soup.findAll(text="Connexion ?"):
             raise UrlRewritingError('You are not logged in,\
                                      check if your cookie for\
                                      authentication is up to date')
         else:
             raise UrlRewritingError('You have reached your download\
                                     limit per 24hours, so I cannot\
                                     get the torrent')
     torrent_url = ("http://www.frenchtorrentdb.com" + tag_a.get('href') + "&js=1")
     log.debug('TORRENT URL is : %s' % torrent_url)
     return torrent_url
Пример #22
0
 def parse_download_page(self, url):
     try:
         page = requests.get(url).content
         soup = get_soup(page, 'html.parser')
         download_link = soup.findAll(
             href=re.compile('redirect|redirectlink'))
         download_href = download_link[0]['href']
         return download_href
     except Exception:
         raise UrlRewritingError('Unable to locate torrent from url %s' %
                                 url)
Пример #23
0
 def url_rewrite(self, task, entry):
     try:
         # need to fake user agent
         txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
         page = task.requests.get(entry['url'], headers=txheaders)
         soup = get_soup(page.text)
         results = soup.find_all('a', attrs={'class': 'l'})
         if not results:
             raise UrlRewritingError('No results')
         for res in results:
             url = res.get('href')
             url = url.replace('/interstitial?url=', '')
             # generate match regexp from google search result title
             regexp = '.*'.join([x.contents[0] for x in res.find_all('em')])
             if re.match(regexp, entry['title']):
                 log.debug('resolved, found with %s' % regexp)
                 entry['url'] = url
                 return
         raise UrlRewritingError('Unable to resolve')
     except Exception as e:
         raise UrlRewritingError(e)
Пример #24
0
 def url_rewrite(self, task, entry):
     if not 'url' in entry:
         log.error("Didn't actually get a URL...")
     else:
         log.debug("Got the URL: %s" % entry['url'])
     if entry['url'].startswith(BASE_URL + '/t?'):
         # use search
         results = self.search(task, entry)
         if not results:
             raise UrlRewritingError("No search results found")
         # TODO: Search doesn't enforce close match to title, be more picky
         entry['url'] = results[0]['url']
Пример #25
0
 def url_rewrite(self, task, entry):
     if 'url' not in entry:
         log.error("Didn't actually get a URL...")
     else:
         log.debug("Got the URL: %s" % entry['url'])
     if entry['url'].startswith('http://torrentleech.org/torrents/browse/index/query/'):
         # use search
         results = self.search(task, entry)
         if not results:
             raise UrlRewritingError("No search results found")
         # TODO: Search doesn't enforce close match to title, be more picky
         entry['url'] = results[0]['url']
Пример #26
0
 def url_rewrite(self, task, entry):
     if not 'url' in entry:
         log.error("Didn't actually get a URL...")
     else:
         log.debug("Got the URL: %s" % entry['url'])
     if entry['url'].startswith(
             'http://torrentleech.org/torrents/browse/index/query/'):
         # use search
         try:
             entry['url'] = self.search(entry)[0]['url']
         except PluginWarning as e:
             raise UrlRewritingError(e)
     else:
         entry['url'] = entry['url']
Пример #27
0
 def url_rewrite(self, task, entry):
     if 'url' not in entry:
         log.error("Didn't actually get a URL...")
     else:
         log.debug("Got the URL: %s" % entry['url'])
     if URL_SEARCH.match(entry['url']):
         # use search
         results = self.search(task, entry)
         if not results:
             raise UrlRewritingError("No search results found")
         # TODO: Close matching was taken out of search methods, this may need to be fixed to be more picky
         entry['url'] = results[0]['url']
     else:
         # parse download page
         entry['url'] = self.parse_download_page(entry['url'])
Пример #28
0
 def url_rewrite(self, task, entry):
     if not 'url' in entry:
         log.error("Didn't actually get a URL...")
     else:
         log.debug("Got the URL: %s" % entry['url'])
     if entry['url'].startswith(('http://thepiratebay.se/search/',
                                 'http://thepiratebay.org/search/')):
         # use search
         try:
             entry['url'] = self.search(entry['title'])[0]['url']
         except PluginWarning as e:
             raise UrlRewritingError(e)
     else:
         # parse download page
         entry['url'] = self.parse_download_page(entry['url'])
Пример #29
0
    def url_rewrite(self, task, entry):
        for name, config in self.resolves.get(task.name, {}).items():
            regexp = config['regexp_compiled']
            format = config['format']
            if regexp.search(entry['url']):
                log.debug('Regexp resolving %s with %s' % (entry['url'], name))

                # run the regexp
                entry['url'] = regexp.sub(format, entry['url'])

                if regexp.match(entry['url']):
                    entry.fail('urlrewriting')
                    raise UrlRewritingError(
                        'Regexp %s result should NOT continue to match!' %
                        name)
                return
Пример #30
0
    def get_login_cookies(self, username, password):
        url_auth = 'http://www.t411.in/users/login'
        db_session = Session()
        account = db_session.query(torrent411Account).filter(
            torrent411Account.username == username).first()
        if account:
            if account.expiry_time < datetime.now():
                db_session.delete(account)
                db_session.commit()
            log.debug("Cookies found in db!")
            return account.auth
        else:
            log.debug("Getting login cookies from : %s " % url_auth)
            params = {'login': username, 'password': password, 'remember': '1'}
            cj = cookielib.CookieJar()
            #           WE NEED A COOKIE HOOK HERE TO AVOID REDIRECT COOKIES
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            #           NEED TO BE SAME USER_AGENT THAN DOWNLOAD LINK
            opener.addheaders = [('User-agent', self.USER_AGENT)]
            login_output = None
            try:
                login_output = opener.open(url_auth,
                                           urllib.urlencode(params)).read()
            except Exception as e:
                raise UrlRewritingError("Connection Error for %s : %s" %
                                        (url_auth, e))

            if b'confirmer le captcha' in login_output:
                log.warn("Captcha requested for login.")
                login_output = self._solveCaptcha(login_output, url_auth,
                                                  params, opener)

            if b'logout' in login_output:
                authKey = None
                uid = None
                password = None

                for cookie in cj:
                    if cookie.name == "authKey":
                        authKey = cookie.value
                    if cookie.name == "uid":
                        uid = cookie.value
                    if cookie.name == "pass":
                        password = cookie.value

                if authKey is not None and \
                   uid is not None and \
                   password is not None:
                    authCookie = {
                        'uid': uid,
                        'password': password,
                        'authKey': authKey
                    }
                    db_session.add(
                        torrent411Account(username=username,
                                          auth=authCookie,
                                          expiry_time=datetime.now() +
                                          timedelta(days=1)))
                    db_session.commit()
                    return authCookie
            else:
                log.error(
                    "Login failed (Torrent411). Check your login and password."
                )
                return {}