Exemplo n.º 1
0
    def search(self, task, entry, config=None):
        """
        Search for name from iptorrents
        """

        categories = config.get('category', 'All')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]

        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c]
                      for c in categories]
        filter_url = '&'.join((str(c) + '=') for c in categories)

        entries = set()

        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query = quote_plus(query.encode('utf8'))

            url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query)
            log.debug('searching with url: %s' % url)
            req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']})

            if '/u/' + str(config['uid']) not in req.text:
                raise plugin.PluginError("Invalid cookies (user not logged in)...")

            soup = get_soup(req.content, parser="html.parser")
            torrents = soup.find('table', {'id': 'torrents'})

            results = torrents.findAll('tr')
            for torrent in results:
                if torrent.th and 'ac' in torrent.th.get('class'):
                    # Header column
                    continue
                if torrent.find('td', {'colspan': '99'}):
                    log.debug('No results found for search %s', search_string)
                    break
                entry = Entry()
                link = torrent.find('a', href=re.compile('download'))['href']
                entry['url'] = "{base}{link}?torrent_pass={key}".format(
                    base=BASE_URL, link=link, key=config.get('rss_key'))
                entry['title'] = torrent.find('a', href=re.compile('details')).text

                seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text
                leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text
                entry['torrent_seeds'] = int(seeders)
                entry['torrent_leeches'] = int(leechers)
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'],
                                                            entry['torrent_leeches'])

                size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$'))
                size = re.search('^([\.\d]+) ([GMK]?)B$', size)

                entry['content_size'] = parse_filesize(size.group(0))
                log.debug('Found entry %s', entry)
                entries.add(entry)

        return entries
Exemplo n.º 2
0
    def url_rewrite(self, task, entry):
        soup = self._get_soup(task, entry['url'])

        # grab links from the main post:
        link_elements = []
        log.debug('Searching %s for a tags where the text matches one of: %s',
                  entry['url'], str(self.config.get('link_text_re')))
        for regexp in self.config.get('link_text_re'):
            link_elements.extend(soup.find_all('a', string=re.compile(regexp)))
        log.debug('Original urls: %s', str(entry['urls']))
        if entry['urls']:
            urls = list(entry['urls'])
        else:
            urls = []
        log.debug('Found link elements: %s', str(link_elements))
        for element in link_elements:
            if re.search('nfo1.rlsbb.(ru|com)', element['href']):
                # grab multipart links
                urls.extend(self.grab_multilinks(task, element['href']))
            else:
                urls.append(element['href'])

        # grab links from comments
        regexps = self.config.get('filehosters_re', [])
        if self.config.get('parse_comments'):
            comments = soup.find_all('div', id=re.compile("commentbody"))
            log.debug('Comment parsing enabled: found %d comments.', len(comments))
            if comments and not regexps:
                log.warn('You have enabled comment parsing but you did not define any filehoster_re filter. You may get a lot of unwanted and potentially dangerous links from the comments.')
            for comment in comments:
                links = comment.find_all('a')
                for link in links:
                    urls.append(link['href'])

        # filter urls:
        filtered_urls = []
        for i, url in enumerate(urls):
            urls[i] = normalize_unicode(url)
            for regexp in regexps:
                if re.search(regexp, urls[i]):
                    filtered_urls.append(urls[i])
                    log.debug('Url: "%s" matched filehoster filter: %s', urls[i], regexp)
                    break
            else:
                if regexps:
                    log.debug(
                        'Url: "%s" was discarded because it does not match any of the given filehoster filters: %s', urls[i], str(regexps))
        if regexps:
            log.debug('Using filehosters_re filters: %s', str(regexps))
            urls = filtered_urls
        else:
            log.debug('No filehoster filters configured, using all found links.')
        num_links = len(urls)
        log.debug('Original urls: %s', str(entry['urls']))
        log.verbose('Found %d links at %s.', num_links, entry['url'])
        if num_links:
            entry['urls'] = urls
            entry['url'] = urls[0]
        else:
            raise UrlRewritingError('No useable links found at %s' % entry['url'])
Exemplo n.º 3
0
    def url_rewrite(self, task, entry):
        soup = self._get_soup(task, entry['url'])

        link_re = re.compile('rarefile\.net.*\.rar$')

        # grab links from the main entry:
        blog_entry = soup.find('div', class_="entry")
        num_links = 0
        link_list = None
        for paragraph in blog_entry.find_all('p'):
            links = paragraph.find_all('a', href=link_re)
            if len(links) > num_links:
                link_list = links
                num_links = len(links)
        if 'urls' in entry:
            urls = list(entry['urls'])
        else:
            urls = []
        if link_list is not None:
            for link in link_list:
                urls.append(normalize_unicode(link['href']))
        else:
            raise UrlRewritingError('No useable links found at %s' %
                                    entry['url'])

        num_links = len(urls)
        log.verbose('Found %d links at %s.', num_links, entry['url'])
        if num_links:
            entry['urls'] = urls
            entry['url'] = urls[0]
        else:
            raise UrlRewritingError('No useable links found at %s' %
                                    entry['url'])
Exemplo n.º 4
0
    def search(self, task, entry, config=None):
        from flexget.utils.template import environment
        from flexget.manager import manager

        search_strings = [
            urllib.quote(normalize_unicode(s).encode("utf-8")) for s in entry.get("search_strings", [entry["title"]])
        ]
        rss_plugin = plugin.get_plugin_by_name("rss")
        entries = set()
        rss_config = rss_plugin.instance.build_config(config)
        try:
            template = environment.from_string(rss_config["url"])
        except TemplateSyntaxError as e:
            raise plugin.PluginError("Invalid jinja template as rss url: %s" % e)
        rss_config["all_entries"] = True
        for search_string in search_strings:
            rss_config["url"] = template.render({"search_term": search_string})
            # TODO: capture some other_fields to try to find seed/peer/content_size numbers?
            try:
                results = rss_plugin.phase_handlers["input"](task, rss_config)
            except plugin.PluginError as e:
                log.error("Error attempting to get rss for %s: %s", rss_config["url"], e)
            else:
                entries.update(results)
        return entries
Exemplo n.º 5
0
 def search(self, task, entry, config=None):
     from flexget.utils.template import environment
     search_strings = [
         quote(normalize_unicode(s).encode('utf-8'))
         for s in entry.get('search_strings', [entry['title']])
     ]
     rss_plugin = plugin.get_plugin_by_name('rss')
     entries = set()
     rss_config = rss_plugin.instance.build_config(config)
     try:
         template = environment.from_string(rss_config['url'])
     except TemplateSyntaxError as e:
         raise plugin.PluginError('Invalid jinja template as rss url: %s' %
                                  e)
     rss_config['all_entries'] = True
     for search_string in search_strings:
         rss_config['url'] = template.render({'search_term': search_string})
         # TODO: capture some other_fields to try to find seed/peer/content_size numbers?
         try:
             results = rss_plugin.phase_handlers['input'](task, rss_config)
         except plugin.PluginError as e:
             log.error('Error attempting to get rss for %s: %s',
                       rss_config['url'], e)
         else:
             entries.update(results)
     return entries
Exemplo n.º 6
0
    def url_rewrite(self, task, entry):
        soup = self._get_soup(task, entry['url'])

        link_re = re.compile('rarefile\.net.*\.rar$')

        # grab links from the main entry:
        blog_entry = soup.find('div', class_="entry")
        num_links = 0
        link_list = None
        for paragraph in blog_entry.find_all('p'):
            links = paragraph.find_all('a', href=link_re)
            if len(links) > num_links:
                link_list = links
                num_links = len(links)
        if 'urls' in entry:
            urls = list(entry['urls'])
        else:
            urls = []
        if link_list is not None:
            for link in link_list:
                urls.append(normalize_unicode(link['href']))
        else:
            raise UrlRewritingError('No useable links found at %s' % entry['url'])

        num_links = len(urls)
        log.verbose('Found %d links at %s.', num_links, entry['url'])
        if num_links:
            entry['urls'] = urls
            entry['url'] = urls[0]
        else:
            raise UrlRewritingError('No useable links found at %s' % entry['url'])
Exemplo n.º 7
0
    def search(self, task, entry, config=None):
        """
            Search for entries on SceneAccess
        """

        if not session.cookies:
            log.debug('Logging in to %s...' % URL)
            params = {'username': config['username'],
                      'password': config['password'],
                      'submit': 'come on in'}
            session.post(URL + 'login', data=params)

        if config.has_key('gravity_multiplier'):
            multip = config['gravity_multiplier']
        else:
            multip = 1

        # Prepare queries...
        BASE_URLS = list()
        entries = set()
        for category in self.processCategories(config):
            BASE_URLS.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category)

        # Search...
        for search_string in entry.get('search_strings', [entry['title']]):
            search_string_normalized = normalize_unicode(clean_title(search_string))
            search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8'))

            for url in BASE_URLS:
                url += search_string_url_fragment
                log.debug('Search URL for `%s`: %s' % (search_string, url))

                page = session.get(url).content
                soup = get_soup(page)

                for result in soup.findAll('tr', attrs={'class': 'tt_row'}):
                    entry = Entry()
                    entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title']
                    entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href']

                    entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).text
                    entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).text
                    entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])*multip

                    size = result.find('td', attrs={'class': 'ttr_size'}).next
                    size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                    if size:
                        if size.group(2) == 'GB':
                            entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2)
                        elif size.group(2) == 'MB':
                            entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2)
                        elif size.group(2) == 'KB':
                            entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2)
                        else:
                            entry['content_size'] = int(float(size.group(1)) / 1024 ** 2)

                    entries.add(entry)

        return entries
Exemplo n.º 8
0
    def search(self, entry, config):
        # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
        name = normalize_unicode(entry["title"])
        optionlist = [
            "misc",
            "movies",
            "audio",
            "tv",
            "games",
            "apps",
            "pics",
            "anime",
            "comics",
            "books",
            "music video",
            "unclassified",
            "all",
        ]
        url = "http://isohunt.com/js/rss/%s?iht=%s&noSL" % (
            urllib.quote(name.encode("utf-8")),
            optionlist.index(config),
        )

        log.debug("requesting: %s" % url)
        rss = feedparser.parse(url)
        entries = []

        status = rss.get("status", False)
        if status != 200:
            raise PluginWarning("Search result not 200 (OK), received %s" % status)

        ex = rss.get("bozo_exception", False)
        if ex:
            raise PluginWarning("Got bozo_exception (bad feed)")

        for item in rss.entries:
            entry = Entry()
            entry["title"] = item.title
            entry["url"] = item.link

            m = re.search(r"Size: ([\d]+).*Seeds: (\d+).*Leechers: (\d+)", item.description, re.IGNORECASE)
            if not m:
                log.debug("regexp did not find seeds / peer data")
                continue
            else:
                log.debug("regexp found size(%s), Seeds(%s) and Leeches(%s)" % (m.group(1), m.group(2), m.group(3)))

                entry["content_size"] = int(m.group(1))
                entry["torrent_seeds"] = int(m.group(2))
                entry["torrent_leeches"] = int(m.group(3))
                entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"])

            entries.append(entry)
        # choose torrent
        if not entries:
            raise PluginWarning("No close matches for %s" % name, log, log_once=True)

        entries.sort(reverse=True, key=lambda x: x.get("search_sort"))

        return entries
Exemplo n.º 9
0
    def search(self, task, entry, config=None):
        """
        Search for name from iptorrents
        """

        categories = config.get('category', 'All')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]

        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c]
                      for c in categories]
        filter_url = '&'.join((str(c) + '=') for c in categories)

        entries = set()

        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query = quote_plus(query.encode('utf8'))

            url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query)
            log.debug('searching with url: %s' % url)
            req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']})

            if '/u/' + str(config['uid']) not in req.text:
                raise plugin.PluginError("Invalid cookies (user not logged in)...")

            soup = get_soup(req.content, parser="html.parser")
            torrents = soup.find('table', {'id': 'torrents'})

            results = torrents.findAll('tr')
            for torrent in results:
                if torrent.th and 'ac' in torrent.th.get('class'):
                    # Header column
                    continue
                if torrent.find('td', {'colspan': '99'}):
                    log.debug('No results found for search %s', search_string)
                    return
                entry = Entry()
                link = torrent.find('a', href=re.compile('download'))['href']
                entry['url'] = "{base}{link}?torrent_pass={key}".format(
                    base=BASE_URL, link=link, key=config.get('rss_key'))
                entry['title'] = torrent.find('a', href=re.compile('details')).text

                seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text
                leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text
                entry['torrent_seeds'] = int(seeders)
                entry['torrent_leeches'] = int(leechers)
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'],
                                                            entry['torrent_leeches'])

                size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$'))
                size = re.search('^([\.\d]+) ([GMK]?)B$', size)

                entry['content_size'] = parse_filesize(size.group(0))
                log.debug('Found entry %s', entry)
                entries.add(entry)

        return entries
Exemplo n.º 10
0
 def search(self, entry, config=None):
     from flexget.utils.template import environment
     from flexget.manager import manager
     search_strings = [
         urllib.quote(normalize_unicode(s).encode('utf-8'))
         for s in entry.get('search_strings', [entry['title']])
     ]
     rss_plugin = plugin.get_plugin_by_name('rss')
     entries = set()
     rss_config = rss_plugin.instance.build_config(config)
     template = environment.from_string(rss_config['url'])
     rss_config['all_entries'] = True
     for search_string in search_strings:
         # Create a fake task to pass to the rss plugin input handler
         task = Task(manager, 'search_rss_task', config={})
         rss_config['url'] = template.render({'search_term': search_string})
         # TODO: capture some other_fields to try to find seed/peer/content_size numbers?
         try:
             results = rss_plugin.phase_handlers['input'](task, rss_config)
         except plugin.PluginError as e:
             log.error('Error attempting to get rss for %s: %s',
                       rss_config['url'], e)
         else:
             entries.update(results)
     return entries
Exemplo n.º 11
0
    def search(self, task, entry, config=None):
        """
        Search for name from torrentleech.
        """
        request_headers = {'User-Agent': 'curl/7.54.0'}
        rss_key = config['rss_key']

        # build the form request:
        data = {'username': config['username'], 'password': config['password']}
        # POST the login form:
        try:
            login = task.requests.post('https://www.torrentleech.org/user/account/login/', data=data,
                                       headers=request_headers, allow_redirects=True)
        except RequestException as e:
            raise PluginError('Could not connect to torrentleech: %s', str(e))

        if not isinstance(config, dict):
            config = {}
            # sort = SORT.get(config.get('sort_by', 'seeds'))
            # if config.get('sort_reverse'):
            # sort += 1
        categories = config.get('category', 'all')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]
        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories]
        filter_url = '/categories/{}'.format(','.join(str(c) for c in categories))
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string).replace(":", "")
            # urllib.quote will crash if the unicode string has non ascii characters,
            # so encode in utf-8 beforehand

            url = ('https://www.torrentleech.org/torrents/browse/list/query/' +
                   quote(query.encode('utf-8')) + filter_url)
            log.debug('Using %s as torrentleech search url', url)

            results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json()

            for torrent in results['torrentList']:
                entry = Entry()
                entry['download_headers'] = request_headers
                entry['title'] = torrent['name']

                # construct download URL
                torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(torrent['fid'], rss_key,
                                                                                          torrent['filename'])
                log.debug('RSS-ified download link: %s', torrent_url)
                entry['url'] = torrent_url

                # seeders/leechers
                entry['torrent_seeds'] = torrent['seeders']
                entry['torrent_leeches'] = torrent['leechers']
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                entry['content_size'] = parse_filesize(str(torrent['size']) + ' b')
                entries.add(entry)

        return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
Exemplo n.º 12
0
    def url_rewrite(self, task, entry):
        soup = self._get_soup(task, entry['url'])

        # grab link from filehosters_re
        link_elements = []
        log.debug('Searching %s for a tags where the text matches one of: %s',
                  entry['url'], str(self.config.get('filehosters_re')))
        regexps = self.config.get('filehosters_re', [])
        if self.config.get('parse'):
            link_elements = soup.find_all('div',
                                          class_=re.compile("mag_details"))
            log.debug(
                'filehosters_re parsing enabled: found %d filehosters_re.',
                len(link_elements))
        log.debug('Original urls: %s', str(entry['urls']))
        if 'urls' in entry:
            urls = list(entry['urls'])
            log.debug('Original urls: %s', str(entry['urls']))
        else:
            urls = []
        log.debug('link_elements parsing enabled: found %d link_elements.',
                  len(link_elements))
        if link_elements and not regexps:
            log.warn('There are not in filehosters_re.')
        for target in link_elements:
            links = target.find_all('a')
            for link in links:
                if re.search('novafile.com', link['href']):
                    urls.append(link['href'])

        # filter urls:
        filtered_urls = []
        for i, url in enumerate(urls):
            urls[i] = normalize_unicode(url)
            for regexp in regexps:
                if re.search(regexp, urls[i]):
                    filtered_urls.append(urls[i])
                    log.debug('Url: "%s" matched filehoster filter: %s',
                              urls[i], regexp)
                    break
            else:
                if regexps:
                    log.debug(
                        'Url: "%s" was discarded because it does not match any of the given filehoster filters: %s',
                        urls[i], str(regexps))
        if regexps:
            log.debug('Using filehosters_re filters: %s', str(regexps))
            urls = filtered_urls
        else:
            log.debug(
                'No filehoster filters configured, using all found links.')
        num_links = len(urls)
        log.verbose('Found %d links at %s.', num_links, entry['url'])
        if num_links:
            entry['urls'] = urls
            entry['url'] = urls[0]
        else:
            raise UrlRewritingError('No useable links found at %s' %
                                    entry['url'])
Exemplo n.º 13
0
    def search(self, task, entry, config=None):
        config = self.process_config(config)
        feed = REPUTATIONS[config['reputation']]
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string +
                                      config.get('extra_terms', ''))
            for domain in ['eu', 'me', 'ch', 'in']:
                # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
                url = 'http://torrentz.%s/%s?q=%s' % (
                    domain, feed, quote(query.encode('utf-8')))
                log.debug('requesting: %s' % url)
                try:
                    r = task.requests.get(url)
                    break
                except requests.ConnectionError as err:
                    # The different domains all resolve to the same ip, so only try more if it was a dns error
                    log.warning('torrentz.%s connection failed. Error: %s' %
                                (domain, err))
                    continue
                except requests.RequestException as err:
                    raise plugin.PluginError(
                        'Error getting torrentz search results: %s' % err)

            else:
                raise plugin.PluginError(
                    'Error getting torrentz search results')

            if not r.content.strip():
                raise plugin.PluginError(
                    'No data from %s. Maybe torrentz is blocking the FlexGet User-Agent'
                    % url)

            rss = feedparser.parse(r.content)

            if rss.get('bozo_exception'):
                raise plugin.PluginError('Got bozo_exception (bad rss feed)')

            for item in rss.entries:
                m = re.search(
                    r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)',
                    item.description, re.IGNORECASE)
                if not m:
                    log.debug('regexp did not find seeds / peer data')
                    continue

                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['content_size'] = int(m.group(1))
                entry['torrent_seeds'] = int(m.group(2).replace(',', ''))
                entry['torrent_leeches'] = int(m.group(3).replace(',', ''))
                entry['torrent_info_hash'] = m.group(4).upper()
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                entries.add(entry)

        log.debug('Search got %d results' % len(entries))
        return entries
Exemplo n.º 14
0
    def search(self, task, entry, config=None):
        """
        Search for name from piratebay.
        """
        if not isinstance(config, dict):
            config = {}
        sort = SORT.get(config.get('sort_by', 'seeds'))
        if config.get('sort_reverse'):
            sort += 1
        if isinstance(config.get('category'), int):
            category = config['category']
        else:
            category = CATEGORIES.get(config.get('category', 'all'))
        filter_url = '/0/%d/%d' % (sort, category)

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            # TPB search doesn't like dashes
            query = query.replace('-', ' ')
            # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
            url = 'http://thepiratebay.%s/search/%s%s' % (
                CUR_TLD, quote(query.encode('utf-8')), filter_url)
            log.debug('Using %s as piratebay search url' % url)
            page = requests.get(url).content
            soup = get_soup(page)
            for link in soup.find_all('a', attrs={'class': 'detLink'}):
                entry = Entry()
                entry['title'] = self.extract_title(link)
                if not entry['title']:
                    log.error(
                        'Malformed search result. No title or url found. Skipping.'
                    )
                    continue
                entry['url'] = 'http://thepiratebay.%s%s' % (CUR_TLD,
                                                             link.get('href'))
                tds = link.parent.parent.parent.find_all('td')
                entry['torrent_seeds'] = int(tds[-2].contents[0])
                entry['torrent_leeches'] = int(tds[-1].contents[0])
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                # Parse content_size
                size = link.find_next(attrs={'class': 'detDesc'}).contents[0]
                size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size)
                if size:
                    if size.group(2) == 'G':
                        entry['content_size'] = int(
                            float(size.group(1)) * 1000**3 / 1024**2)
                    elif size.group(2) == 'M':
                        entry['content_size'] = int(
                            float(size.group(1)) * 1000**2 / 1024**2)
                    else:
                        entry['content_size'] = int(
                            float(size.group(1)) * 1000 / 1024**2)
                entries.add(entry)

        return sorted(entries,
                      reverse=True,
                      key=lambda x: x.get('search_sort'))
Exemplo n.º 15
0
    def search(self, task, entry, config=None):
        """
        Search for name from torrentleech.
        """
        request_headers = {'User-Agent': 'curl/7.54.0'}
        rss_key = config['rss_key']

        # build the form request:
        data = {'username': config['username'], 'password': config['password']}
        # POST the login form:
        try:
            login = task.requests.post('https://www.torrentleech.org/user/account/login/', data=data,
                                       headers=request_headers, allow_redirects=True)
        except RequestException as e:
            raise PluginError('Could not connect to torrentleech: %s' % str(e))

        if not isinstance(config, dict):
            config = {}
            # sort = SORT.get(config.get('sort_by', 'seeds'))
            # if config.get('sort_reverse'):
            # sort += 1
        categories = config.get('category', 'all')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]
        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories]
        filter_url = '/categories/{}'.format(','.join(str(c) for c in categories))
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string).replace(":", "")
            # urllib.quote will crash if the unicode string has non ascii characters,
            # so encode in utf-8 beforehand

            url = ('https://www.torrentleech.org/torrents/browse/list/query/' +
                   quote(query.encode('utf-8')) + filter_url)
            log.debug('Using %s as torrentleech search url', url)

            results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json()

            for torrent in results['torrentList']:
                entry = Entry()
                entry['download_headers'] = request_headers
                entry['title'] = torrent['name']

                # construct download URL
                torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(torrent['fid'], rss_key,
                                                                                          torrent['filename'])
                log.debug('RSS-ified download link: %s', torrent_url)
                entry['url'] = torrent_url

                # seeders/leechers
                entry['torrent_seeds'] = torrent['seeders']
                entry['torrent_leeches'] = torrent['leechers']
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                entry['content_size'] = parse_filesize(str(torrent['size']) + ' b')
                entries.add(entry)

        return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
Exemplo n.º 16
0
    def search(self, task, entry, config=None):
        """
            Search for entries on SceneAccess
        """

        session = task.requests

        if 'sceneaccess.eu' not in session.domain_limiters:
            session.add_domain_limiter(TimedLimiter('sceneaccess.eu', '7 seconds'))

        if not session.cookies:
            log.debug('Logging in to %s...' % URL)
            params = {'username': config['username'],
                      'password': config['password'],
                      'submit': 'come on in'}
            session.post(URL + 'login', data=params)

        if 'gravity_multiplier' in config:
            multip = config['gravity_multiplier']
        else:
            multip = 1

        # Prepare queries...
        base_urls = list()
        entries = set()
        for category in self.process_categories(config):
            base_urls.append(URL + '%(url_path)s?method=2%(category_url_string)s' % category)

        # Search...
        for search_string in entry.get('search_strings', [entry['title']]):
            search_string_normalized = normalize_unicode(clean_title(search_string))
            search_string_url_fragment = '&search=' + quote(search_string_normalized.encode('utf8'))

            for url in base_urls:
                url += search_string_url_fragment
                log.debug('Search URL for `%s`: %s' % (search_string, url))

                page = session.get(url).content
                soup = get_soup(page)

                for result in soup.findAll('tr', attrs={'class': 'tt_row'}):
                    entry = Entry()
                    entry['title'] = result.find('a', href=re.compile(r'details\?id=\d+'))['title']
                    entry['url'] = URL + result.find('a', href=re.compile(r'.torrent$'))['href']

                    entry['torrent_seeds'] = result.find('td', attrs={'class': 'ttr_seeders'}).text
                    entry['torrent_leeches'] = result.find('td', attrs={'class': 'ttr_leechers'}).text
                    entry['search_sort'] = torrent_availability(entry['torrent_seeds'],
                                                                entry['torrent_leeches']) * multip

                    size = result.find('td', attrs={'class': 'ttr_size'}).text
                    size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                    entry['content_size'] = parse_filesize(size.group(0))

                    entries.add(entry)

        return entries
Exemplo n.º 17
0
    def search(self, task, entry, config=None):
        """
        Search for name from iptorrents
        """

        categories = config.get('category', 'all')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]

        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c]
                      for c in categories]
        filter_url = '&'.join((str(c) + '=') for c in categories)

        entries = set()

        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query = quote_plus(query.encode('utf8'))

            url = "{base_url}/t?{filter}&q={query}&qf=".format(base_url=BASE_URL, filter=filter_url, query=query)
            log.debug('searching with url: %s' % url)
            req = requests.get(url, cookies={'uid': str(config['uid']), 'pass': config['password']})

            if '/u/' + str(config.get('uid')) not in req.content:
                raise plugin.PluginError("Invalid cookies (user not logged in)...")

            soup = get_soup(req.content, parser="html.parser")
            torrents = soup.find('table', {'class': 'torrents'})

            for torrent in torrents.findAll('a', href=re.compile('\.torrent$')):
                entry = Entry()
                entry['url'] = "{base}{link}?torrent_pass={key}".format(
                    base=BASE_URL, link=torrent['href'], key=config.get('rss_key'))
                entry['title'] = torrent.findPrevious("a", attrs={'class': 't_title'}).text

                seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text
                leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text
                entry['torrent_seeds'] = int(seeders)
                entry['torrent_leeches'] = int(leechers)
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'],
                                                            entry['torrent_leeches'])

                size = torrent.findNext(text=re.compile('^([\.\d]+) ([GMK]?)B$'))
                size = re.search('^([\.\d]+) ([GMK]?)B$', size)
                if size:
                    if size.group(2) == 'G':
                        entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2)
                    elif size.group(2) == 'M':
                        entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2)
                    elif size.group(2) == 'K':
                        entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2)
                    else:
                        entry['content_size'] = int(float(size.group(1)) / 1024 ** 2)
                entries.add(entry)

        return entries
Exemplo n.º 18
0
    def entries_from_search(self, name, url=None):
        """Parses torrent download url from search results"""
        name = normalize_unicode(name)
        if not url:
            url = 'http://www.newtorrents.info/search/%s' % quote(
                name.encode('utf-8'), safe=b':/~?=&%')

        log.debug('search url: %s' % url)

        html = requests.get(url).text
        # fix </SCR'+'IPT> so that BS does not crash
        # TODO: should use beautifulsoup massage
        html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html)

        soup = get_soup(html)
        # saving torrents in dict
        torrents = []
        for link in soup.find_all('a', attrs={'href': re.compile('down.php')}):
            torrent_url = 'http://www.newtorrents.info%s' % link.get('href')
            release_name = link.parent.next.get('title')
            # quick dirty hack
            seed = link.find_next('td', attrs={
                'class': re.compile('s')
            }).renderContents()
            if seed == 'n/a':
                seed = 0
            else:
                try:
                    seed = int(seed)
                except ValueError:
                    log.warning(
                        'Error converting seed value (%s) from newtorrents to integer.'
                        % seed)
                    seed = 0

            # TODO: also parse content_size and peers from results
            torrents.append(
                Entry(title=release_name,
                      url=torrent_url,
                      torrent_seeds=seed,
                      search_sort=torrent_availability(seed, 0)))
        # sort with seed number Reverse order
        torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0))
        # choose the torrent
        if not torrents:
            dashindex = name.rfind('-')
            if dashindex != -1:
                return self.entries_from_search(name[:dashindex])
            else:
                return torrents
        else:
            if len(torrents) == 1:
                log.debug('found only one matching search result.')
            else:
                log.debug(
                    'search result contains multiple matches, sorted %s by most seeders'
                    % torrents)
            return torrents
Exemplo n.º 19
0
    def search(self, task, entry, config=None):
        """
        Search for name from torrentday.
        """

        categories = config.get('category', 'all')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]
        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories]
        params = { 'cata': 'yes', 'c%s' % ','.join(str(c) for c in categories): 1, 'clear-new': 1}
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):

            url = 'https://www.torrentday.com/browse.php'
            params['search'] = normalize_unicode(search_string).replace(':', '')
            cookies = { 'uid': config['uid'], 'pass': config['passkey'], '__cfduid': config['cfduid'] }

            try:
                page = requests.get(url, params=params, cookies=cookies).content
            except RequestException as e:
                raise PluginError('Could not connect to torrentday: %s' % e)

            soup = get_soup(page)

            for tr in soup.find_all('tr', { 'class': 'browse' }):
                entry = Entry()
                # find the torrent names
                title = tr.find('a', { 'class': 'torrentName' })
                entry['title'] = title.contents[0]
                log.debug('title: %s', title.contents[0])

                # find download link
                torrent_url = tr.find('td', { 'class': 'dlLinksInfo' })
                torrent_url = torrent_url.find('a').get('href')

                # construct download URL
                torrent_url = ( 'https://www.torrentday.com/' + torrent_url + '?torrent_pass='******'rss_key'] )
                log.debug('RSS-ified download link: %s', torrent_url)
                entry['url'] = torrent_url

                # us tr object for seeders/leechers
                seeders, leechers = tr.find_all('td', { 'class': ['seedersInfo', 'leechersInfo']})
                entry['torrent_seeds'] = int(seeders.contents[0].replace(',', ''))
                entry['torrent_leeches'] = int(leechers.contents[0].replace(',', ''))
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])

                # use tr object for size
                size = tr.find('td', text=re.compile('([\.\d]+) ([TGMKk]?)B')).contents[0]
                size = re.search('([\.\d]+) ([TGMKk]?)B', str(size))

                entry['content_size'] = parse_filesize(size.group(0))

                entries.add(entry)

        return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
Exemplo n.º 20
0
    def search(self, task, entry, config):
        search_strings = [normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']])]
        entries = set()
        for search_string in search_strings:
            search_string = clean_title(search_string)
            search_string_url_fragment = search_string
            params = {'rss': 1}
            if config.get('verified'):
                search_string_url_fragment += ' verified:1'
            url = 'https://kat.cr/usearch/%s/' % quote(search_string_url_fragment.encode('utf-8'))
            if config.get('category', 'all') != 'all':
                params['category'] = config['category']

            sorters = [{'field': 'time_add', 'sorder': 'desc'},
                       {'field': 'seeders', 'sorder': 'desc'}]
            for sort in sorters:
                params.update(sort)

                log.debug('requesting: %s' % url)
                try:
                    r = task.requests.get(url, params=params, raise_status=False)
                except RequestException as e:
                    log.warning('Search resulted in: %s' % e)
                    continue
                if not r.content:
                    log.debug('No content returned from search.')
                    continue
                elif r.status_code != 200:
                    log.warning('Search returned %s response code' % r.status_code)
                    continue
                rss = feedparser.parse(r.content)

                ex = rss.get('bozo_exception', False)
                if ex:
                    log.warning('Got bozo_exception (bad feed)')
                    continue

                for item in rss.entries:
                    entry = Entry()
                    entry['title'] = item.title

                    if not item.get('enclosures'):
                        log.warning('Could not get url for entry from KAT. Maybe plugin needs updated?')
                        continue
                    entry['url'] = item.enclosures[0]['url']
                    entry['torrent_seeds'] = int(item.torrent_seeds)
                    entry['torrent_leeches'] = int(item.torrent_peers)
                    entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                    entry['content_size'] = int(item.torrent_contentlength) / 1024 / 1024
                    entry['torrent_info_hash'] = item.torrent_infohash

                    entries.add(entry)

                if len(rss.entries) < 25:
                    break

        return entries
Exemplo n.º 21
0
    def prepare_search_query(self, search_string):
        query = normalize_unicode(search_string)
        se = re.findall('((((|S)[\d]+(E|x)[\d]+)|(|S)[\d]+))$', query)[0][0]
        query = re.sub(se, '', query).strip()

        self.se = se
        self.query = query

        return query
Exemplo n.º 22
0
    def search(self, task, entry, config=None):
        """
            Search for entries on PublicHD
        """

        categories = config.get('category', 'all')
        # Ensure categories a list
        if not isinstance(categories, list):
            categories = [categories]
        # Convert named category to its respective category id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories]
        category_url_fragment = '&category=%s' % urllib.quote(';'.join(str(c) for c in categories))

        base_url = 'http://publichd.se/index.php?page=torrents&active=0'

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query_url_fragment = '&search=' + urllib.quote(query.encode('utf8'))

            # http://publichd.se/index.php?page=torrents&active=0&category=5;15&search=QUERY
            url = (base_url + category_url_fragment + query_url_fragment)
            log.debug('PublicHD search url: %s' % url)

            page = requests.get(url).content
            soup = get_soup(page)

            for result in soup.findAll('a', href=re.compile('page=torrent-details')):
                entry = Entry()
                entry['title'] = result.text
                # Expand the selection to whole row
                result = result.findPrevious('tr')
                download_url = result.find('a', href=re.compile('\.torrent$'))['href']
                torrent_hash = re.search(r'/([0-9a-fA-F]{5,40})/', download_url).group(1)

                entry['url'] = 'http://publichd.se/download.php?id=%s' % torrent_hash

                seeds, leeches = result.findAll('td', text=re.compile('^\d+$'))
                entry['torrent_seeds'] = int(seeds.text)
                entry['torrent_leeches'] = int(leeches.text)
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                size = result.find("td", text=re.compile('(\d+(?:[.,]\d+)*)\s?([KMG]B)')).text
                size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                if size:
                    if size.group(2) == 'GB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2)
                    elif size.group(2) == 'MB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2)
                    elif size.group(2) == 'KB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2)
                    else:
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2)

                entries.add(entry)

        return entries
Exemplo n.º 23
0
    def search(self, task, entry, config=None):
        """
            Search for entries on PublicHD
        """

        categories = config.get('category', 'all')
        # Ensure categories a list
        if not isinstance(categories, list):
            categories = [categories]
        # Convert named category to its respective category id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories]
        category_url_fragment = '&category=%s' % urllib.parse.quote(';'.join(str(c) for c in categories))

        base_url = 'http://publichd.se/index.php?page=torrents&active=0'

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query_url_fragment = '&search=' + urllib.parse.quote(query.encode('utf8'))

            # http://publichd.se/index.php?page=torrents&active=0&category=5;15&search=QUERY
            url = (base_url + category_url_fragment + query_url_fragment)
            log.debug('PublicHD search url: %s' % url)

            page = requests.get(url).content
            soup = get_soup(page)

            for result in soup.findAll('a', href=re.compile('page=torrent-details')):
                entry = Entry()
                entry['title'] = result.text
                # Expand the selection to whole row
                result = result.findPrevious('tr')
                download_url = result.find('a', href=re.compile('\.torrent$'))['href']
                torrent_hash = re.search(r'/([0-9a-fA-F]{5,40})/', download_url).group(1)

                entry['url'] = 'http://publichd.se/download.php?id=%s' % torrent_hash

                seeds, leeches = result.findAll('td', text=re.compile('^\d+$'))
                entry['torrent_seeds'] = int(seeds.text)
                entry['torrent_leeches'] = int(leeches.text)
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                size = result.find("td", text=re.compile('(\d+(?:[.,]\d+)*)\s?([KMG]B)')).text
                size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                if size:
                    if size.group(2) == 'GB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2)
                    elif size.group(2) == 'MB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2)
                    elif size.group(2) == 'KB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2)
                    else:
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2)

                entries.add(entry)

        return entries
Exemplo n.º 24
0
    def search(self, entry, config=None):
        config = self.process_config(config)
        feed = REPUTATIONS[config['reputation']]
        entries = set()
        for search_string in entry.get('search_string', [entry['title']]):
            query = normalize_unicode(search_string +
                                      config.get('extra_terms', ''))
            # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
            url = 'http://torrentz.eu/%s?q=%s' % (
                feed, urllib.quote(query.encode('utf-8')))
            log.debug('requesting: %s' % url)
            try:
                opened = urllib2.urlopen(url)
            except urllib2.URLError as err:
                url = 'http://torrentz.me/%s?q=%s' % (
                    feed, urllib.quote(query.encode('utf-8')))
                log.warning(
                    'torrentz.eu failed, trying torrentz.me. Error: %s' % err)
                try:
                    opened = urllib2.urlopen(url)
                except urllib2.URLError as err:
                    raise plugin.PluginWarning('Error requesting URL: %s' %
                                               err)
            rss = feedparser.parse(opened)

            status = rss.get('status', False)
            if status != 200:
                raise plugin.PluginWarning(
                    'Search result not 200 (OK), received %s %s' %
                    (status, opened.msg))

            ex = rss.get('bozo_exception', False)
            if ex:
                raise plugin.PluginWarning('Got bozo_exception (bad feed)')

            for item in rss.entries:
                m = re.search(
                    r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)',
                    item.description, re.IGNORECASE)
                if not m:
                    log.debug('regexp did not find seeds / peer data')
                    continue

                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['content_size'] = int(m.group(1))
                entry['torrent_seeds'] = int(m.group(2).replace(',', ''))
                entry['torrent_leeches'] = int(m.group(3).replace(',', ''))
                entry['torrent_info_hash'] = m.group(4).upper()
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                entries.add(entry)

        log.debug('Search got %d results' % len(entries))
        return entries
Exemplo n.º 25
0
    def search(self, task, entry, config=None):
        """
        Search for name from fuzer.
        """
        self.rss_key = config['rss_key']
        username = config['username']
        password = hashlib.md5(config['password'].encode('utf-8')).hexdigest()

        # build the form request:
        data = {'cookieuser': '******',
                'do': 'login',
                's': '',
                'securitytoken': 'guest',
                'vb_login_username': username,
                'vb_login_password': '',
                'vb_login_md5password': password,
                'vb_login_md5password_utf': password
                }
        # POST the login form:
        try:
            login = requests.post('https://www.fuzer.me/login.php?do=login', data=data)
        except RequestException as e:
            raise PluginError('Could not connect to fuzer: %s' % str(e))

        login_check_phrases = ['ההתחברות נכשלה', 'banned']
        if any(phrase in login.text for phrase in login_check_phrases):
            raise PluginError('Login to Fuzer failed, check credentials')

        self.user_id = requests.cookies.get('fzr2userid')
        category = config.get('category', [0])
        # Make sure categories is a list
        if not isinstance(category, list):
            category = [category]

        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in category]

        c_list = []
        for c in categories:
            c_list.append('c{}={}'.format(quote_plus('[]'), c))

        entries = []
        if entry.get('imdb_id'):
            log.debug('imdb_id {} detected, using in search.'.format(entry['imdb_id']))
            soup = self.get_fuzer_soup(entry['imdb_id'], c_list)
            entries = self.extract_entry_from_soup(soup)
            if entries:
                for e in list(entries):
                    e['imdb_id'] = entry.get('imdb_id')
        else:
            for search_string in entry.get('search_strings', [entry['title']]):
                query = normalize_unicode(search_string).replace(":", "")
                text = quote_plus(query.encode('windows-1255'))
                soup = self.get_fuzer_soup(text, c_list)
                entries += self.extract_entry_from_soup(soup)
        return sorted(entries, reverse=True, key=lambda x: x.get('search_sort')) if entries else []
Exemplo n.º 26
0
    def search(self, task, entry, config):
        """
            Search for entries on RarBG
        """

        categories = config.get('category', 'all')
        # Ensure categories a list
        if not isinstance(categories, list):
            categories = [categories]
        # Convert named category to its respective category id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories]
        category_url_fragment = ';'.join(str(c) for c in categories)

        entries = set()

        token = self.get_token()
        if not token:
            log.error('No token set. Exiting RARBG search.')
            return entries

        params = {'mode': 'search', 'token': token, 'ranked': int(config['ranked']),
                  'min_seeders': config['min_seeders'], 'min_leechers': config['min_leechers'],
                  'sort': config['sorted_by'], 'category': category_url_fragment, 'format': 'json'}

        for search_string in entry.get('search_strings', [entry['title']]):
            params.pop('search_string', None)
            params.pop('search_imdb', None)

            if entry.get('movie_name'):
                params['search_imdb'] = entry.get('imdb_id')
            else:
                query = normalize_unicode(search_string)
                query_url_fragment = query.encode('utf8')
                params['search_string'] = query_url_fragment
                if config['use_tvdb']:
                    plugin.get_plugin_by_name('thetvdb_lookup').instance.lazy_series_lookup(entry)
                    params['search_tvdb'] = entry.get('tvdb_id')
                    log.debug('Using tvdb id %s' % entry.get('tvdb_id'))

            page = requests.get(self.base_url, params=params)
            log.debug('requesting: %s' % page.url)
            try:
                r = page.json()
            except ValueError:
                log.debug(page.text)
                continue

            for result in r:
                e = Entry()

                e['title'] = result.get('f')
                e['url'] = result.get('d')

                entries.add(e)

        return entries
Exemplo n.º 27
0
    def search(self, entry, config):
        search_strings = [normalize_unicode(s).lower() for s in entry.get('search_strings', [entry['title']])]
        entries = set()
        for search_string in search_strings:
            search_string_url_fragment = search_string

            if config.get('verified'):
                search_string_url_fragment += ' verified:1'
            url = 'http://kickass.to/search/%s/?rss=1' % urllib.quote(search_string_url_fragment.encode('utf-8'))
            if config.get('category', 'all') != 'all':
                url += '&category=%s' % config['category']

            sorters = [{'field': 'time_add', 'sorder': 'desc'},
                       {'field': 'seeders', 'sorder': 'desc'}]
            for sort in sorters:
                url += '&field=%(field)s&sorder=%(sorder)s' % sort

                log.debug('requesting: %s' % url)
                rss = feedparser.parse(url)

                status = rss.get('status', False)
                if status == 404:
                    # Kat returns status code 404 when no results found for some reason...
                    log.debug('No results found for search query: %s' % search_string)
                    continue
                elif status not in [200, 301]:
                    log.warning('Search result not 200 (OK), received %s' % status)
                    continue


                ex = rss.get('bozo_exception', False)
                if ex:
                    log.warning('Got bozo_exception (bad feed)')
                    continue

                for item in rss.entries:
                    entry = Entry()
                    entry['title'] = item.title

                    if not item.get('enclosures'):
                        log.warning('Could not get url for entry from KAT. Maybe plugin needs updated?')
                        continue
                    entry['url'] = item.enclosures[0]['url']
                    entry['torrent_seeds'] = int(item.torrent_seeds)
                    entry['torrent_leeches'] = int(item.torrent_peers)
                    entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                    entry['content_size'] = int(item.torrent_contentlength) / 1024 / 1024
                    entry['torrent_info_hash'] = item.torrent_infohash

                    entries.add(entry)

                if len(rss.entries) < 25:
                    break

        return entries
    def search(self, task, entry, config=None):
        """
            Search for entries on Serienjunkies
        """
        base_url = 'http://serienjunkies.org/search/'
        mull = {
            "Dauer:", "Download:", "Uploader:", u"Größe:", u"Tonhöhe:",
            "Sprache:", "Format:", "HQ-Cover:"
        }
        self.config = task.config.get('searchSerienjunkies') or {}
        self.config.setdefault('hoster', DEFHOS)
        self.config.setdefault('language', DEFLANG)

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query_url_fragment = urllib.quote(query.encode('utf8'))

            # http://serienjunkies.org/search/QUERY
            url = (base_url + query_url_fragment)
            log.debug('Serienjunkies search url: %s' % url)

            page = requests.get(url).content
            soup = get_soup(page)
            hoster = self.config['hoster']
            if self.config['language'] == 'english':
                english = True
            else:
                english = None
            for p in soup.find_all('p'):
                entry = Entry()
                if p.strong is not None and p.strong.text not in mull:
                    if english:
                        try:
                            if not p.strong.find(text=re.compile(
                                    "german", flags=re.IGNORECASE)):
                                link = p.find(
                                    text=re.compile(hoster)).find_previous('a')
                                entry['title'] = p.strong.text
                                entry['url'] = link.get('href')
                                entries.add(entry)
                        except:
                            pass
                    else:
                        try:
                            if p.strong.find(text=re.compile(
                                    "german", flags=re.IGNORECASE)):
                                link = p.find(
                                    text=re.compile(hoster)).find_previous('a')
                                entry['title'] = p.strong.text
                                entry['url'] = link.get('href')
                                entries.add(entry)
                        except:
                            pass
        return entries
Exemplo n.º 29
0
    def search(self, task, entry, config):
        """Search interface"""
        self.setup(task, config)

        entries = set()
        params = self.params_from_config(config)
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            params[self._key('search')] = query
            entries.update(self.get_entries(self.search_results(params)))
        return entries
Exemplo n.º 30
0
    def search(self, task, entry, config=None):
        """
        Search for name from piratebay.
        """
        if not isinstance(config, dict):
            config = {}
        self.set_urls(config.get('url', URL))
        sort = SORT.get(config.get('sort_by', 'seeds'))
        if config.get('sort_reverse'):
            sort += 1
        if isinstance(config.get('category'), int):
            category = config['category']
        else:
            category = CATEGORIES.get(config.get('category', 'all'))
        filter_url = '/0/%d/%d' % (sort, category)

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)

            # TPB search doesn't like dashes or quotes
            query = query.replace('-', ' ').replace("'", " ")

            # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
            url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url)
            log.debug('Using %s as piratebay search url' % url)
            page = task.requests.get(url).content
            soup = get_soup(page)
            for link in soup.find_all('a', attrs={'class': 'detLink'}):
                entry = Entry()
                entry['title'] = self.extract_title(link)
                if not entry['title']:
                    log.error('Malformed search result. No title or url found. Skipping.')
                    continue
                href = link.get('href')
                if href.startswith('/'):  # relative link?
                    href = self.url + href
                entry['url'] = href
                tds = link.parent.parent.parent.find_all('td')
                entry['torrent_seeds'] = int(tds[-2].contents[0])
                entry['torrent_leeches'] = int(tds[-1].contents[0])
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                # Parse content_size
                size_text = link.find_next(attrs={'class': 'detDesc'}).get_text()
                if size_text:
                    size = re.search('Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)', size_text)
                    if size:
                        entry['content_size'] = parse_filesize(size.group(1))
                    else:
                        log.error('Malformed search result? Title: "%s", No size? %s', entry['title'], size_text)

                entries.add(entry)

        return sorted(entries, reverse=True, key=lambda x: x.get('search_sort'))
Exemplo n.º 31
0
    def search(self, arg_entry, config=None):
        """
        Search for name from piratebay.
        """
        if not isinstance(config, dict):
            config = {}
        sort = SORT.get(config.get('sort_by', 'seeds'))
        if config.get('sort_reverse'):
            sort += 1
        if isinstance(config.get('category'), int):
            category = config['category']
        else:
            category = CATEGORIES.get(config.get('category', 'all'))
        filter_url = '/0/%d/%d' % (sort, category)

        entries = set()
        for search_string in arg_entry.get('search_string',
                                           [arg_entry['title']]):
            query = normalize_unicode(search_string)
            # TPB search doesn't like dashes
            query = query.replace('-', ' ')
            # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
            url = 'http://thepiratebay.%s/search/%s%s' % (
                CUR_TLD, urllib.quote(query.encode('utf-8')), filter_url)
            log.debug('Using %s as piratebay search url' % url)
            page = requests.get(url).content
            soup = get_soup(page)
            for link in soup.find_all('a', attrs={'class': 'detLink'}):
                entry = Entry()
                entry['title'] = link.contents[0]
                entry['url'] = 'http://thepiratebay.%s%s' % (CUR_TLD,
                                                             link.get('href'))
                tds = link.parent.parent.parent.find_all('td')
                entry['torrent_seeds'] = int(tds[-2].contents[0])
                entry['torrent_leeches'] = int(tds[-1].contents[0])
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                # Parse content_size
                size = link.find_next(attrs={'class': 'detDesc'}).contents[0]
                size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size)
                if size:
                    if size.group(2) == 'G':
                        entry['content_size'] = int(
                            float(size.group(1)) * 1000**3 / 1024**2)
                    elif size.group(2) == 'M':
                        entry['content_size'] = int(
                            float(size.group(1)) * 1000**2 / 1024**2)
                    else:
                        entry['content_size'] = int(
                            float(size.group(1)) * 1000 / 1024**2)
                entries.add(entry)

        return sorted(
            entries, reverse=True, key=lambda x: x.get('search_sort'))
Exemplo n.º 32
0
    def search(self, entry, config):
        # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
        name = normalize_unicode(entry['title'])
        optionlist = [
            'misc', 'movies', 'audio', 'tv', 'games', 'apps', 'pics', 'anime',
            'comics', 'books', 'music video', 'unclassified', 'all'
        ]
        url = 'http://isohunt.com/js/rss/%s?iht=%s&noSL' % (urllib.quote(
            name.encode('utf-8')), optionlist.index(config))

        log.debug('requesting: %s' % url)
        rss = feedparser.parse(url)
        entries = []

        status = rss.get('status', False)
        if status != 200:
            raise PluginWarning('Search result not 200 (OK), received %s' %
                                status)

        ex = rss.get('bozo_exception', False)
        if ex:
            raise PluginWarning('Got bozo_exception (bad feed)')

        for item in rss.entries:
            entry = Entry()
            entry['title'] = item.title
            entry['url'] = item.link

            m = re.search(r'Size: ([\d]+).*Seeds: (\d+).*Leechers: (\d+)',
                          item.description, re.IGNORECASE)
            if not m:
                log.debug('regexp did not find seeds / peer data')
                continue
            else:
                log.debug('regexp found size(%s), Seeds(%s) and Leeches(%s)' %
                          (m.group(1), m.group(2), m.group(3)))

                entry['content_size'] = int(m.group(1))
                entry['torrent_seeds'] = int(m.group(2))
                entry['torrent_leeches'] = int(m.group(3))
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])

            entries.append(entry)
        # choose torrent
        if not entries:
            raise PluginWarning('No close matches for %s' % name,
                                log,
                                log_once=True)

        entries.sort(reverse=True, key=lambda x: x.get('search_sort'))

        return entries
Exemplo n.º 33
0
    def search(self, task, entry, config=None):
        config = self.prepare_config(config)

        if not session.cookies:
            log.debug('Logging in to %s...' % URL)
            params = {
                'username': config['username'],
                'password': config['password'],
                'keeplogged': '1',
                'login': '******'
            }
            session.post(URL + 'login.php', data=params)

        cat = ''.join([
            '&' + ('filter_cat[%s]' % id) + '=1' for id in config['category']
        ])
        rls = 'release_type=' + config['type']
        url_params = rls + cat
        multip = config['gravity_multiplier']

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            srch = normalize_unicode(clean_title(search_string))
            srch = '&searchstr=' + quote(srch.encode('utf8'))

            url = URL + 'torrents.php?' + url_params + srch
            log.debug('Fetching URL for `%s`: %s' % (search_string, url))

            page = session.get(url).content
            soup = get_soup(page)

            for result in soup.findAll('tr', attrs={'class': 'torrent'}):
                entry = Entry()
                entry['title'] = result.find('span',
                                             attrs={
                                                 'class': 'torrent_name_link'
                                             }).text
                entry['url'] = URL + result.find(
                    'a', href=re.compile(
                        'torrents\.php\?action=download')).get('href')
                entry['torrent_seeds'], entry['torrent_leeches'] = [
                    r.text for r in result.findAll('td')[-2:]
                ]
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches']) * multip

                size = result.findAll('td')[-4].text
                size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                entry['content_size'] = parse_filesize(size.group(0))

                entries.add(entry)
        return entries
Exemplo n.º 34
0
    def search(self, task, entry, config=None):
        if not config:
            log.debug('Divxatope disabled')
            return set()
        log.debug('Search DivxATope')
        url_search = 'http://divxatope1.com/buscar/descargas'
        results = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query = re.sub(' \(\d\d\d\d\)$', '', query)
            log.debug('Searching DivxATope %s' % query)
            query = query.encode('utf8', 'ignore')
            data = {'q': query}
            try:
                response = task.requests.post(url_search, data=data)
            except requests.RequestException as e:
                log.error('Error searching DivxATope: %s' % e)
                return
            content = response.content

            soup = get_soup(content)
            if 'divxatope1.com' in url_search:
                soup2 = soup.find('ul', attrs={'class': 'buscar-list'})
            else:
                soup2 = soup.find('ul', attrs={'class': 'peliculas-box'})
            children = soup2.findAll('a', href=True)
            for child in children:
                entry = Entry()
                entry['url'] = child['href']
                entry_title = child.find('h2')
                if entry_title is None:
                    continue
                entry_title = entry_title.contents
                if not entry_title:
                    continue
                else:
                    entry_title = entry_title[0]
                quality_lan = child.find('strong')
                if quality_lan is None:
                    continue
                quality_lan = quality_lan.contents
                if len(quality_lan) > 2:
                    if (isinstance(quality_lan[0], Tag)):
                        entry_quality_lan = quality_lan[1]
                    else:
                        entry_quality_lan = quality_lan[0] + ' ' + quality_lan[
                            2]
                elif len(quality_lan) == 2:
                    entry_quality_lan = quality_lan[1]
                entry['title'] = entry_title + ' ' + entry_quality_lan
                results.add(entry)
        log.debug('Finish search DivxATope with %d entries' % len(results))
        return results
Exemplo n.º 35
0
    def search(self, task, entry, config=None):
        if not config:
            log.debug('Divxatope disabled')
            return set()
        log.debug('Search DivxATope')
        url_search = 'http://divxatope1.com/buscar/descargas'
        results = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query = re.sub(' \(\d\d\d\d\)$', '', query)
            log.debug('Searching DivxATope %s' % query)
            query = query.encode('utf8', 'ignore')
            data = {'q': query}
            try:
                response = task.requests.post(url_search, data=data)
            except requests.RequestException as e:
                log.error('Error searching DivxATope: %s' % e)
                return
            content = response.content

            soup = get_soup(content)
            if 'divxatope1.com' in url_search:
                soup2 = soup.find('ul', attrs={'class': 'buscar-list'})
            else:
                soup2 = soup.find('ul', attrs={'class': 'peliculas-box'})
            children = soup2.findAll('a', href=True)
            for child in children:
                entry = Entry()
                entry['url'] = child['href']
                entry_title = child.find('h2')
                if entry_title is None:
                    continue
                entry_title = entry_title.contents
                if not entry_title:
                    continue
                else:
                    entry_title = entry_title[0]
                quality_lan = child.find('strong')
                if quality_lan is None:
                    continue
                quality_lan = quality_lan.contents
                if len(quality_lan) > 2:
                    if (isinstance(quality_lan[0], Tag)):
                        entry_quality_lan = quality_lan[1]
                    else:
                        entry_quality_lan = quality_lan[0] + ' ' + quality_lan[2]
                elif len(quality_lan) == 2:
                    entry_quality_lan = quality_lan[1]
                entry['title'] = entry_title + ' ' + entry_quality_lan
                results.add(entry)
        log.debug('Finish search DivxATope with %d entries' % len(results))
        return results
Exemplo n.º 36
0
    def search(self, task, entry, config=None):
        config = self.prepare_config(config)

        if not session.cookies:
            log.debug('Logging in to %s...' % URL)
            params = {
                'username': config['username'],
                'password': config['password'],
                'keeplogged': '1',
                'login': '******'
            }
            session.post(URL + 'login.php', data=params)

        cat = ''.join(['&' + ('filter_cat[%s]' % id) + '=1' for id in config['category']])
        rls = 'release_type=' + config['type']
        url_params = rls + cat
        multip = config['gravity_multiplier']

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            srch = normalize_unicode(clean_title(search_string))
            srch = '&searchstr=' + quote(srch.encode('utf8'))

            url = URL + 'torrents.php?' + url_params + srch
            log.debug('Fetching URL for `%s`: %s' % (search_string, url))

            page = session.get(url).content
            soup = get_soup(page)

            for result in soup.findAll('tr', attrs={'class': 'torrent'}):
                entry = Entry()
                entry['title'] = result.find('span', attrs={'class': 'torrent_name_link'}).text
                entry['url'] = URL + result.find('a', href=re.compile('torrents\.php\?action=download')).get('href')
                entry['torrent_seeds'], entry['torrent_leeches'] = [r.text for r in result.findAll('td')[-2:]]
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches']) * multip

                size = result.findAll('td')[-4].text
                size = re.search('(\d+(?:[.,]\d+)*)\s?([KMG]B)', size)

                if size:
                    if size.group(2) == 'GB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 3 / 1024 ** 2)
                    elif size.group(2) == 'MB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 ** 2 / 1024 ** 2)
                    elif size.group(2) == 'KB':
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) * 1000 / 1024 ** 2)
                    else:
                        entry['content_size'] = int(float(size.group(1).replace(',', '')) / 1024 ** 2)

                entries.add(entry)
        return entries
Exemplo n.º 37
0
    def entries_from_search(self, name, url=None):
        """Parses torrent download url from search results"""
        name = normalize_unicode(name)
        if not url:
            url = "http://www.newtorrents.info/search/%s" % urllib.quote(name.encode("utf-8"), safe=b":/~?=&%")

        log.debug("search url: %s" % url)

        html = urlopener(url, log).read()
        # fix </SCR'+'IPT> so that BS does not crash
        # TODO: should use beautifulsoup massage
        html = re.sub(r"(</SCR.*?)...(.*?IPT>)", r"\1\2", html)

        soup = get_soup(html)
        # saving torrents in dict
        torrents = []
        for link in soup.find_all("a", attrs={"href": re.compile("down.php")}):
            torrent_url = "http://www.newtorrents.info%s" % link.get("href")
            release_name = link.parent.next.get("title")
            # quick dirty hack
            seed = link.find_next("td", attrs={"class": re.compile("s")}).renderContents()
            if seed == "n/a":
                seed = 0
            else:
                try:
                    seed = int(seed)
                except ValueError:
                    log.warning("Error converting seed value (%s) from newtorrents to integer." % seed)
                    seed = 0

            # TODO: also parse content_size and peers from results
            torrents.append(
                Entry(
                    title=release_name, url=torrent_url, torrent_seeds=seed, search_sort=torrent_availability(seed, 0)
                )
            )
        # sort with seed number Reverse order
        torrents.sort(reverse=True, key=lambda x: x.get("search_sort", 0))
        # choose the torrent
        if not torrents:
            dashindex = name.rfind("-")
            if dashindex != -1:
                return self.entries_from_search(name[:dashindex])
            else:
                return torrents
        else:
            if len(torrents) == 1:
                log.debug("found only one matching search result.")
            else:
                log.debug("search result contains multiple matches, sorted %s by most seeders" % torrents)
            return torrents
Exemplo n.º 38
0
    def search(self, task, entry, config=None):
        config = self.process_config(config)
        feed = REPUTATIONS[config['reputation']]
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string + config.get('extra_terms', ''))
            for domain in ['eu', 'me', 'ch', 'in']:
                # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
                url = 'http://torrentz.%s/%s?q=%s' % (domain, feed, quote(query.encode('utf-8')))
                log.debug('requesting: %s' % url)
                try:
                    r = task.requests.get(url)
                    break
                except requests.ConnectionError as err:
                    # The different domains all resolve to the same ip, so only try more if it was a dns error
                    log.warning('torrentz.%s connection failed. Error: %s' % (domain, err))
                    continue
                except requests.RequestException as err:
                    raise plugin.PluginError('Error getting torrentz search results: %s' % err)

            else:
                raise plugin.PluginError('Error getting torrentz search results')

            if not r.content.strip():
                raise plugin.PluginError('No data from %s. Maybe torrentz is blocking the FlexGet User-Agent' % url)

            rss = feedparser.parse(r.content)

            if rss.get('bozo_exception'):
                raise plugin.PluginError('Got bozo_exception (bad rss feed)')

            for item in rss.entries:
                m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)',
                              item.description, re.IGNORECASE)
                if not m:
                    log.debug('regexp did not find seeds / peer data')
                    continue

                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['content_size'] = int(m.group(1))
                entry['torrent_seeds'] = int(m.group(2).replace(',', ''))
                entry['torrent_leeches'] = int(m.group(3).replace(',', ''))
                entry['torrent_info_hash'] = m.group(4).upper()
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                entries.add(entry)

        log.debug('Search got %d results' % len(entries))
        return entries
Exemplo n.º 39
0
    def search(self, entry, config=None):
        if config:
            feed = REPUTATIONS[config]
        else:
            feed = REPUTATIONS['good']
        query = normalize_unicode(entry['title'])
        # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
        url = 'http://torrentz.eu/%s?q=%s' % (
            feed, urllib.quote(query.encode('utf-8')))
        log.debug('requesting: %s' % url)
        rss = feedparser.parse(url)
        entries = []

        status = rss.get('status', False)
        if status != 200:
            raise PluginWarning('Search result not 200 (OK), received %s' %
                                status)

        ex = rss.get('bozo_exception', False)
        if ex:
            raise PluginWarning('Got bozo_exception (bad feed)')

        for item in rss.entries:
            m = re.search(
                r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)',
                item.description, re.IGNORECASE)
            if not m:
                log.debug('regexp did not find seeds / peer data')
                continue

            entry = Entry()
            entry['title'] = item.title
            entry['url'] = item.link
            entry['content_size'] = int(m.group(1))
            entry['torrent_seeds'] = int(m.group(2).replace(',', ''))
            entry['torrent_leeches'] = int(m.group(3).replace(',', ''))
            entry['torrent_info_hash'] = m.group(4).upper()
            entry['search_sort'] = torrent_availability(
                entry['torrent_seeds'], entry['torrent_leeches'])
            entries.append(entry)

        # choose torrent
        if not entries:
            raise PluginWarning('No close matches for %s' % query,
                                log,
                                log_once=True)

        entries.sort(reverse=True, key=lambda x: x.get('search_sort'))
        log.debug('Search got %d results' % len(entries))
        return entries
Exemplo n.º 40
0
    def search(self, task, entry, config=None):
        """
        Search for name from piratebay.
        """
        if not isinstance(config, dict):
            config = {}
        sort = SORT.get(config.get("sort_by", "seeds"))
        if config.get("sort_reverse"):
            sort += 1
        if isinstance(config.get("category"), int):
            category = config["category"]
        else:
            category = CATEGORIES.get(config.get("category", "all"))
        filter_url = "/0/%d/%d" % (sort, category)

        entries = set()
        for search_string in entry.get("search_strings", [entry["title"]]):
            query = normalize_unicode(search_string)
            # TPB search doesn't like dashes
            query = query.replace("-", " ")
            # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
            url = "http://thepiratebay.%s/search/%s%s" % (CUR_TLD, quote(query.encode("utf-8")), filter_url)
            log.debug("Using %s as piratebay search url" % url)
            page = requests.get(url).content
            soup = get_soup(page)
            for link in soup.find_all("a", attrs={"class": "detLink"}):
                entry = Entry()
                entry["title"] = self.extract_title(link)
                if not entry["title"]:
                    log.error("Malformed search result. No title or url found. Skipping.")
                    continue
                entry["url"] = "http://thepiratebay.%s%s" % (CUR_TLD, link.get("href"))
                tds = link.parent.parent.parent.find_all("td")
                entry["torrent_seeds"] = int(tds[-2].contents[0])
                entry["torrent_leeches"] = int(tds[-1].contents[0])
                entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"])
                # Parse content_size
                size = link.find_next(attrs={"class": "detDesc"}).contents[0]
                size = re.search("Size ([\.\d]+)\xa0([GMK])iB", size)
                if size:
                    if size.group(2) == "G":
                        entry["content_size"] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2)
                    elif size.group(2) == "M":
                        entry["content_size"] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2)
                    else:
                        entry["content_size"] = int(float(size.group(1)) * 1000 / 1024 ** 2)
                entries.add(entry)

        return sorted(entries, reverse=True, key=lambda x: x.get("search_sort"))
Exemplo n.º 41
0
    def search(self, entry, config=None):
        config = self.process_config(config)
        feed = REPUTATIONS[config['reputation']]
        entries = set()
        for search_string in entry.get('search_string', [entry['title']]):
            query = normalize_unicode(search_string+config.get('extra_terms', ''))
            # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
            url = 'http://torrentz.eu/%s?q=%s' % (feed, urllib.quote(query.encode('utf-8')))
            log.debug('requesting: %s' % url)
            try:
                opened = urllib2.urlopen(url)
            except urllib2.URLError as err:
                url = 'http://torrentz.me/%s?q=%s' % (feed, urllib.quote(query.encode('utf-8')))
                log.warning('torrentz.eu failed, trying torrentz.me. Error: %s' % err)
                try:
                    opened = urllib2.urlopen(url)
                except urllib2.URLError as err:
                    raise plugin.PluginWarning('Error requesting URL: %s' % err)
            rss = feedparser.parse(opened)

            status = rss.get('status', False)
            if status != 200:
                raise plugin.PluginWarning(
                    'Search result not 200 (OK), received %s %s' %
                    (status, opened.msg))

            ex = rss.get('bozo_exception', False)
            if ex:
                raise plugin.PluginWarning('Got bozo_exception (bad feed)')

            for item in rss.entries:
                m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)',
                              item.description, re.IGNORECASE)
                if not m:
                    log.debug('regexp did not find seeds / peer data')
                    continue

                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['content_size'] = int(m.group(1))
                entry['torrent_seeds'] = int(m.group(2).replace(',', ''))
                entry['torrent_leeches'] = int(m.group(3).replace(',', ''))
                entry['torrent_info_hash'] = m.group(4).upper()
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                entries.add(entry)

        log.debug('Search got %d results' % len(entries))
        return entries
Exemplo n.º 42
0
    def search(self, entry, config=None):
        config = self.process_config(config)
        feed = REPUTATIONS[config['reputation']]
        entries = set()
        for search_string in entry.get('search_string', [entry['title']]):
            query = normalize_unicode(search_string +
                                      config.get('extra_terms', ''))
            for domain in ['eu', 'me']:
                # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
                url = 'http://torrentz.%s/%s?q=%s' % (
                    domain, feed, urllib.quote(query.encode('utf-8')))
                log.debug('requesting: %s' % url)
                try:
                    r = requests.get(url)
                    break
                except requests.RequestException as err:
                    log.warning('torrentz.%s failed. Error: %s' %
                                (domain, err))
            else:
                raise plugin.PluginWarning(
                    'Error getting torrentz search results')

            rss = feedparser.parse(r.content)

            ex = rss.get('bozo_exception', False)
            if ex:
                raise plugin.PluginWarning('Got bozo_exception (bad feed)')

            for item in rss.entries:
                m = re.search(
                    r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)',
                    item.description, re.IGNORECASE)
                if not m:
                    log.debug('regexp did not find seeds / peer data')
                    continue

                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['content_size'] = int(m.group(1))
                entry['torrent_seeds'] = int(m.group(2).replace(',', ''))
                entry['torrent_leeches'] = int(m.group(3).replace(',', ''))
                entry['torrent_info_hash'] = m.group(4).upper()
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                entries.add(entry)

        log.debug('Search got %d results' % len(entries))
        return entries
Exemplo n.º 43
0
 def search(self, entry, config=None):
     from flexget.utils.template import environment
     from flexget.manager import manager
     query = entry['title']
     search_string = urllib.quote(normalize_unicode(query).encode('utf-8'))
     rss_plugin = get_plugin_by_name('rss')
     # Create a fake task to pass to the rss plugin input handler
     task = Task(manager, 'search_rss_task', {})
     # Use a copy of the config, so we don't overwrite jinja url when filling in search term
     config = rss_plugin.instance.build_config(config).copy()
     template = environment.from_string(config['url'])
     config['url'] = template.render({'search_term': search_string})
     config['all_entries'] = True
     # TODO: capture some other_fields to try to find seed/peer/content_size numbers?
     return rss_plugin.phase_handlers['input'](task, config)
Exemplo n.º 44
0
    def search(self, task, entry, config=None):
        """
            Search for entries on Serienjunkies
        """
        base_url = 'http://serienjunkies.org/search/'
        mull = {"Dauer:", "Download:", "Uploader:", u"Größe:", u"Tonhöhe:", "Sprache:", "Format:", "HQ-Cover:"}
        self.config = task.config.get('searchSerienjunkies') or {}
        self.config.setdefault('hoster', DEFHOS)
        self.config.setdefault('language', DEFLANG)

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query_url_fragment = urllib.quote(query.encode('utf8'))

            # http://serienjunkies.org/search/QUERY
            url = (base_url + query_url_fragment)
            log.debug('Serienjunkies search url: %s' % url)

            page = requests.get(url).content
            soup = get_soup(page)
            hoster = self.config['hoster']
            if self.config['language'] == 'english':
                english = True
            else:
                english = None
            for p in soup.find_all('p'):
                entry = Entry()
                if p.strong is not None and p.strong.text not in mull:
                  if english:
                    try:
                      if not p.strong.find(text=re.compile("german", flags=re.IGNORECASE)):
                        link = p.find(text=re.compile(hoster)).find_previous('a')
                        entry['title'] = p.strong.text
                        entry['url'] = link.get('href')
                        entries.add(entry)
                    except:
                      pass
                  else:
                    try:
                      if p.strong.find(text=re.compile("german", flags=re.IGNORECASE)):
                        link = p.find(text=re.compile(hoster)).find_previous('a')
                        entry['title'] = p.strong.text
                        entry['url'] = link.get('href')
                        entries.add(entry)
                    except:
                      pass
        return entries
Exemplo n.º 45
0
    def entries_from_search(self, name, url=None):
        """Parses torrent download url from search results"""
        name = normalize_unicode(name)
        if not url:
            url = 'http://www.newtorrents.info/search/%s' % urllib.quote(name.encode('utf-8'), safe=':/~?=&%')

        log.debug('search url: %s' % url)

        html = urlopener(url, log).read()
        # fix </SCR'+'IPT> so that BS does not crash
        # TODO: should use beautifulsoup massage
        html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html)

        soup = get_soup(html)
        # saving torrents in dict
        torrents = []
        for link in soup.find_all('a', attrs={'href': re.compile('down.php')}):
            torrent_url = 'http://www.newtorrents.info%s' % link.get('href')
            release_name = link.parent.next.get('title')
            # quick dirty hack
            seed = link.find_next('td', attrs={'class': re.compile('s')}).renderContents()
            if seed == 'n/a':
                seed = 0
            else:
                try:
                    seed = int(seed)
                except ValueError:
                    log.warning('Error converting seed value (%s) from newtorrents to integer.' % seed)
                    seed = 0

            #TODO: also parse content_size and peers from results
            torrents.append(Entry(title=release_name, url=torrent_url, torrent_seeds=seed,
                                  search_sort=torrent_availability(seed, 0)))
        # sort with seed number Reverse order
        torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0))
        # choose the torrent
        if not torrents:
            dashindex = name.rfind('-')
            if dashindex != -1:
                return self.entries_from_search(name[:dashindex])
            else:
                raise PluginWarning('No matches for %s' % name, log, log_once=True)
        else:
            if len(torrents) == 1:
                log.debug('found only one matching search result.')
            else:
                log.debug('search result contains multiple matches, sorted %s by most seeders' % torrents)
            return torrents
Exemplo n.º 46
0
    def search(self, entry, config=None):
        config = self.process_config(config)
        feed = REPUTATIONS[config["reputation"]]
        entries = set()
        for search_string in entry.get("search_strings", [entry["title"]]):
            query = normalize_unicode(search_string + config.get("extra_terms", ""))
            for domain in ["eu", "me"]:
                # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
                url = "http://torrentz.%s/%s?q=%s" % (domain, feed, urllib.quote(query.encode("utf-8")))
                log.debug("requesting: %s" % url)
                try:
                    r = requests.get(url, headers={"User-Agent": "FlexGet/%s" % flexget.__version__})
                    break
                except requests.RequestException as err:
                    log.warning("torrentz.%s failed. Error: %s" % (domain, err))
            else:
                raise plugin.PluginError("Error getting torrentz search results")

            if not r.content.strip():
                raise plugin.PluginError("No data from %s. Maybe torrentz is blocking the FlexGet User-Agent" % url)

            rss = feedparser.parse(r.content)

            if rss.get("bozo_exception"):
                raise plugin.PluginError("Got bozo_exception (bad rss feed)")

            for item in rss.entries:
                m = re.search(
                    r"Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)",
                    item.description,
                    re.IGNORECASE,
                )
                if not m:
                    log.debug("regexp did not find seeds / peer data")
                    continue

                entry = Entry()
                entry["title"] = item.title
                entry["url"] = item.link
                entry["content_size"] = int(m.group(1))
                entry["torrent_seeds"] = int(m.group(2).replace(",", ""))
                entry["torrent_leeches"] = int(m.group(3).replace(",", ""))
                entry["torrent_info_hash"] = m.group(4).upper()
                entry["search_sort"] = torrent_availability(entry["torrent_seeds"], entry["torrent_leeches"])
                entries.add(entry)

        log.debug("Search got %d results" % len(entries))
        return entries
Exemplo n.º 47
0
    def search(self, entry, config):
        search_strings = [
            normalize_unicode(s).lower()
            for s in entry.get('search_strings', [entry['title']])
        ]
        entries = set()
        for search_string in search_strings:
            if config.get('verified'):
                search_string += ' verified:1'
            url = 'http://kickass.to/search/%s/?rss=1' % urllib.quote(
                search_string.encode('utf-8'))
            if config.get('category', 'all') != 'all':
                url += '&category=%s' % config['category']

            log.debug('requesting: %s' % url)
            rss = feedparser.parse(url)

            status = rss.get('status', False)
            if status != 200:
                raise plugin.PluginWarning(
                    'Search result not 200 (OK), received %s' % status)

            ex = rss.get('bozo_exception', False)
            if ex:
                raise plugin.PluginWarning('Got bozo_exception (bad feed)')

            for item in rss.entries:
                entry = Entry()
                entry['title'] = item.title

                if not item.get('enclosures'):
                    log.warning(
                        'Could not get url for entry from KAT. Maybe plugin needs updated?'
                    )
                    continue
                entry['url'] = item.enclosures[0]['url']
                entry['torrent_seeds'] = int(item.torrent_seeds)
                entry['torrent_leeches'] = int(item.torrent_peers)
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                entry['content_size'] = int(
                    item.torrent_contentlength) / 1024 / 1024
                entry['torrent_info_hash'] = item.torrent_infohash

                entries.add(entry)

        return entries
Exemplo n.º 48
0
 def url_rewrite(self, task, entry):
     try:
         page = task.requests.get(entry['url'])
     except RequestException as e:
         raise UrlRewritingError(str(e))
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(str(e))
     link_elements = soup.find_all('pre', class_='links')
     if 'urls' in entry:
         urls = list(entry['urls'])
     else:
         urls = []
     for element in link_elements:
         urls.extend(element.text.splitlines())
     regexps = self.config.get('filehosters_re', [])
     filtered_urls = []
     for i, url in enumerate(urls):
         urls[i] = normalize_unicode(url)
         for regexp in regexps:
             if re.search(regexp, urls[i]):
                 filtered_urls.append(urls[i])
                 log.debug('Url: "%s" matched filehoster filter: %s',
                           urls[i], regexp)
                 break
         else:
             if regexps:
                 log.debug(
                     'Url: "%s" does not match any of the given filehoster filters: %s',
                     urls[i], str(regexps))
     if regexps:
         log.debug('Using filehosters_re filters: %s', str(regexps))
         urls = filtered_urls
     else:
         log.debug(
             'No filehoster filters configured, using all found links.')
     num_links = len(urls)
     log.verbose('Found %d links at %s.', num_links, entry['url'])
     if num_links:
         entry['urls'] = urls
         entry['url'] = urls[0]
     else:
         raise UrlRewritingError('No useable links found at %s' %
                                 entry['url'])
Exemplo n.º 49
0
 def search(self, task, entry, config=None):
     if not config:
         log.debug('NewPCT disabled')
         return set()
     log.debug('Search NewPCT')
     url_search = 'http://newpct1.com/buscar'
     results = set()
     for search_string in entry.get('search_strings', [entry['title']]):
         query = normalize_unicode(search_string)
         query = re.sub(' \(\d\d\d\d\)$', '', query)
         log.debug('Searching NewPCT %s', query)
         query = unicodedata.normalize('NFD',
                                       query).encode('ascii', 'ignore')
         data = {'q': query}
         try:
             response = task.requests.post(url_search, data=data)
         except requests.RequestException as e:
             log.error('Error searching NewPCT: %s', e)
             return results
         content = response.content
         soup = get_soup(content)
         soup2 = soup.find('ul', attrs={'class': 'buscar-list'})
         children = soup2.findAll('a', href=True)
         for child in children:
             entry = Entry()
             entry['url'] = child['href']
             entry_title = child.find('h2')
             if entry_title is None:
                 log.debug('Ignore empty entry')
                 continue
             entry_title = entry_title.text
             if not entry_title:
                 continue
             try:
                 entry_quality_lan = re.search(
                     '.+ \[([^\]]+)\](\[[^\]]+\])+$', entry_title).group(1)
             except AttributeError:
                 log.debug('Quality not found')
                 continue
             entry_title = re.sub(' \[.+]$', '', entry_title)
             entry['title'] = entry_title + ' ' + entry_quality_lan
             results.add(entry)
     log.debug('Finish search NewPCT with %d entries', len(results))
     return results
Exemplo n.º 50
0
    def search(self, entry, config=None):
        if config:
            feed = REPUTATIONS[config]
        else:
            feed = REPUTATIONS['good']
        query = normalize_unicode(entry['title'])
        # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
        url = 'http://torrentz.eu/%s?q=%s' % (feed, urllib.quote(query.encode('utf-8')))
        log.debug('requesting: %s' % url)
        rss = feedparser.parse(url)
        entries = []

        status = rss.get('status', False)
        if status != 200:
            raise PluginWarning('Search result not 200 (OK), received %s' % status)

        ex = rss.get('bozo_exception', False)
        if ex:
            raise PluginWarning('Got bozo_exception (bad feed)')

        for item in rss.entries:
            m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)',
                          item.description, re.IGNORECASE)
            if not m:
                log.debug('regexp did not find seeds / peer data')
                continue

            entry = Entry()
            entry['title'] = item.title
            entry['url'] = item.link
            entry['content_size'] = int(m.group(1))
            entry['torrent_seeds'] = int(m.group(2).replace(',', ''))
            entry['torrent_leeches'] = int(m.group(3).replace(',', ''))
            entry['torrent_info_hash'] = m.group(4).upper()
            entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
            entries.append(entry)

        # choose torrent
        if not entries:
            raise PluginWarning('No close matches for %s' % query, log, log_once=True)

        entries.sort(reverse=True, key=lambda x: x.get('search_sort'))
        log.debug('Search got %d results' % len(entries))
        return entries
Exemplo n.º 51
0
    def search(self, task, entry, config=None):
        """
        Search for name from fuzer.
        """
        self.rss_key = config['rss_key']
        self.user_id = config['user_id']

        self.cookies = {
            'fzr2lastactivity': '0',
            'fzr2lastvisit': '',
            'fzr2password': config['cookie_password'],
            'fzr2sessionhash': '',
            'fzr2userid': str(self.user_id)
        }

        category = config.get('category', [0])
        # Make sure categories is a list
        if not isinstance(category, list):
            category = [category]

        # If there are any text categories, turn them into their id number
        categories = [
            c if isinstance(c, int) else CATEGORIES[c] for c in category
        ]
        c_list = ['c{}={}'.format(quote_plus('[]'), c) for c in categories]

        entries = []
        if entry.get('imdb_id'):
            log.debug("imdb_id '%s' detected, using in search.",
                      entry['imdb_id'])
            soup = self.get_fuzer_soup(entry['imdb_id'], c_list)
            entries = self.extract_entry_from_soup(soup)
            if entries:
                for e in list(entries):
                    e['imdb_id'] = entry.get('imdb_id')
        else:
            for search_string in entry.get('search_strings', [entry['title']]):
                query = normalize_unicode(search_string).replace(":", "")
                text = quote_plus(query.encode('windows-1255'))
                soup = self.get_fuzer_soup(text, c_list)
                entries += self.extract_entry_from_soup(soup)
        return sorted(entries,
                      reverse=True,
                      key=lambda x: x.get('search_sort')) if entries else []
Exemplo n.º 52
0
    def search(self, entry, config=None):
        config = self.process_config(config)
        feed = REPUTATIONS[config['reputation']]
        entries = set()
        for search_string in entry.get('search_string', [entry['title']]):
            query = normalize_unicode(search_string+config.get('extra_terms', ''))
            for domain in ['eu', 'me']:
                # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
                url = 'http://torrentz.%s/%s?q=%s' % (domain, feed, urllib.quote(query.encode('utf-8')))
                log.debug('requesting: %s' % url)
                try:
                    r = requests.get(url)
                    break
                except requests.RequestException as err:
                    log.warning('torrentz.%s failed. Error: %s' % (domain, err))
            else:
                raise plugin.PluginWarning('Error getting torrentz search results')

            rss = feedparser.parse(r.content)

            ex = rss.get('bozo_exception', False)
            if ex:
                raise plugin.PluginWarning('Got bozo_exception (bad feed)')

            for item in rss.entries:
                m = re.search(r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)',
                              item.description, re.IGNORECASE)
                if not m:
                    log.debug('regexp did not find seeds / peer data')
                    continue

                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['content_size'] = int(m.group(1))
                entry['torrent_seeds'] = int(m.group(2).replace(',', ''))
                entry['torrent_leeches'] = int(m.group(3).replace(',', ''))
                entry['torrent_info_hash'] = m.group(4).upper()
                entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                entries.add(entry)

        log.debug('Search got %d results' % len(entries))
        return entries
Exemplo n.º 53
0
    def search(self, task, entry, config=None):
        entries = set()
        search_strings = [normalize_unicode(s) for s in entry.get('search_strings', [entry['title']])]
        for search_string in search_strings:
            url = 'https://yts.am/api/v2/list_movies.json?query_term=%s' % (
                urllib.quote(search_string.encode('utf-8')))

            log.debug('requesting: %s' % url)

            try:
                result = requests.get(url)
                try:
                    data = result.json()
                except ValueError:
                    log.debug('Could not decode json from response: %s', result.text)
                    raise plugin.PluginError('Error getting result from yts.')
            except requests.RequestException as e:
                raise plugin.PluginError('Could not retrieve query from yts (%s)' % e.args[0])
            if not data['status'] == 'ok':
                raise plugin.PluginError('failed to query YTS')

            try:
                if data['data']['movie_count'] > 0:
                    for item in data['data']['movies']:
                        for torrent in item['torrents']:
                            entry = Entry()
                            entry['title'] = item['title']
                            entry['year'] = item['year']
                            entry['url'] = torrent['url']
                            entry['content_size'] = parse_filesize(str(torrent['size_bytes']) + "b")
                            entry['torrent_seeds'] = torrent['seeds']
                            entry['torrent_leeches'] = torrent['peers']
                            entry['torrent_info_hash'] = torrent['hash']
                            entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                            entry['quality'] = torrent['quality']
                            entry['imdb_id'] = item['imdb_code']
                            if entry.isvalid():
                                entries.add(entry)
            except Exception:
                log.debug('invalid return structure from YTS')

        log.debug('Search got %d results' % len(entries))
        return entries
Exemplo n.º 54
0
    def search(self, task, entry, config=None):
        if not isinstance(config, dict):
            config = {}

        category = CATEGORIES.get(config.get('category', 'all'), None)
        category_query = '&cid=%d' % category if category else ''

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)

            search_query = '&search=%s' % quote(query.encode('utf-8'))

            url = ('http://extratorrent.cc/rss.xml?type=search%s%s' %
                   (category_query, search_query))

            log.debug('Using %s as extratorrent search url' % url)

            rss = feedparser.parse(url)
            status = rss.get('status', False)
            if status != 200:
                log.debug('Search result not 200 (OK), received %s' % status)
            if not status or status >= 400:
                continue

            for item in rss.entries:
                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['content_size'] = int(item.size) / 1024 / 1024
                entry['torrent_info_hash'] = item.info_hash

                if isinstance(item.seeders, int):
                    entry['torrent_seeds'] = int(item.seeders)

                if isinstance(item.leechers, int):
                    entry['torrent_leeches'] = int(item.leechers)

                entries.add(entry)

        return entries
Exemplo n.º 55
0
    def search(self, task, entry, config):
        if not isinstance(config, dict):
            config = {'category': config}
        config.setdefault('category', 'anime eng')
        config.setdefault('filter', 'all')
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            name = normalize_unicode(search_string)
            url = 'https://www.nyaa.si/?page=rss&q=%s&c=%s&f=%s' % (
                quote(name.encode('utf-8')), CATEGORIES[config['category']],
                FILTERS.index(config['filter']))

            log.debug('requesting: %s' % url)
            rss = feedparser.parse(url)

            status = rss.get('status', False)
            if status != 200:
                log.debug('Search result not 200 (OK), received %s' % status)
            if status >= 400:
                continue

            ex = rss.get('bozo_exception', False)
            if ex:
                log.error('Got bozo_exception (bad feed) on %s' % url)
                continue

            for item in rss.entries:
                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['torrent_seeds'] = int(item.nyaa_seeders)
                entry['torrent_leeches'] = int(item.nyaa_leechers)
                entry['torrent_info_hash'] = item.nyaa_infohash
                entry['search_sort'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                if item.nyaa_size:
                    entry['content_size'] = parse_filesize(item.nyaa_size)

                entries.add(entry)

        return entries
Exemplo n.º 56
0
    def search(self, entry, config):
        if not isinstance(config, dict):
            config = {'category': config}
        config.setdefault('category', 'anime')
        config.setdefault('filter', 'all')
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            name = normalize_unicode(search_string)
            url = 'http://www.nyaa.se/?page=rss&cats=%s&filter=%s&term=%s' % (
                CATEGORIES[config['category']], FILTERS.index(
                    config['filter']), urllib.quote(name.encode('utf-8')))

            log.debug('requesting: %s' % url)
            rss = feedparser.parse(url)

            status = rss.get('status', False)
            if status != 200:
                log.debug('Search result not 200 (OK), received %s' % status)
            if status >= 400:
                continue

            ex = rss.get('bozo_exception', False)
            if ex:
                log.error('Got bozo_exception (bad feed) on %s' % url)
                continue

            for item in rss.entries:

                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                # TODO: parse some shit
                #entry['torrent_seeds'] = int(item.seeds)
                #entry['torrent_leeches'] = int(item.leechs)
                #entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
                #entry['content_size'] = int(item.size) / 1024 / 1024

                entries.add(entry)

        return entries