Python normalize_unicode примеры, flexget.components.sites.utils.normalize_unicode Python примеры использования

Пример #1

0

Показать файл

Файл: allyoulike.py Проект: Flexget/Flexget

    def url_rewrite(self, task, entry):
        soup = self._get_soup(task, entry['url'])

        link_re = re.compile('rarefile\.net.*\.rar$')

        # grab links from the main entry:
        blog_entry = soup.find('div', class_="entry")
        num_links = 0
        link_list = None
        for paragraph in blog_entry.find_all('p'):
            links = paragraph.find_all('a', href=link_re)
            if len(links) > num_links:
                link_list = links
                num_links = len(links)
        if 'urls' in entry:
            urls = list(entry['urls'])
        else:
            urls = []
        if link_list is not None:
            for link in link_list:
                urls.append(normalize_unicode(link['href']))
        else:
            raise UrlRewritingError('No useable links found at %s' % entry['url'])

        num_links = len(urls)
        log.verbose('Found %d links at %s.', num_links, entry['url'])
        if num_links:
            entry['urls'] = urls
            entry['url'] = urls[0]
        else:
            raise UrlRewritingError('No useable links found at %s' % entry['url'])

Пример #2

0

Показать файл

Файл: newtorrents.py Проект: Flexget/Flexget

    def entries_from_search(self, name, url=None):
        """Parses torrent download url from search results"""
        name = normalize_unicode(name)
        if not url:
            url = 'http://www.newtorrents.info/search/%s' % quote(
                name.encode('utf-8'), safe=b':/~?=&%'
            )

        log.debug('search url: %s' % url)

        html = requests.get(url).text
        # fix </SCR'+'IPT> so that BS does not crash
        # TODO: should use beautifulsoup massage
        html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html)

        soup = get_soup(html)
        # saving torrents in dict
        torrents = []
        for link in soup.find_all('a', attrs={'href': re.compile('down.php')}):
            torrent_url = 'http://www.newtorrents.info%s' % link.get('href')
            release_name = link.parent.next.get('title')
            # quick dirty hack
            seed = link.find_next('td', attrs={'class': re.compile('s')}).renderContents()
            if seed == 'n/a':
                seed = 0
            else:
                try:
                    seed = int(seed)
                except ValueError:
                    log.warning(
                        'Error converting seed value (%s) from newtorrents to integer.' % seed
                    )
                    seed = 0

            # TODO: also parse content_size and peers from results
            torrents.append(
                Entry(
                    title=release_name,
                    url=torrent_url,
                    torrent_seeds=seed,
                    torrent_availability=torrent_availability(seed, 0),
                )
            )
        # sort with seed number Reverse order
        torrents.sort(reverse=True, key=lambda x: x.get('torrent_availability', 0))
        # choose the torrent
        if not torrents:
            dashindex = name.rfind('-')
            if dashindex != -1:
                return self.entries_from_search(name[:dashindex])
            else:
                return torrents
        else:
            if len(torrents) == 1:
                log.debug('found only one matching search result.')
            else:
                log.debug(
                    'search result contains multiple matches, sorted %s by most seeders' % torrents
                )
            return torrents

Пример #3

0

Показать файл

Файл: rss.py Проект: Flexget/Flexget

    def search(self, task, entry, config=None):
        from flexget.utils.template import environment

        search_strings = [
            quote(normalize_unicode(s).encode('utf-8'))
            for s in entry.get('search_strings', [entry['title']])
        ]
        rss_plugin = plugin.get_plugin_by_name('rss')
        entries = set()
        rss_config = rss_plugin.instance.build_config(config)
        try:
            template = environment.from_string(rss_config['url'])
        except TemplateSyntaxError as e:
            raise plugin.PluginError('Invalid jinja template as rss url: %s' % e)
        rss_config['all_entries'] = True
        for search_string in search_strings:
            rss_config['url'] = template.render({'search_term': search_string})
            # TODO: capture some other_fields to try to find seed/peer/content_size numbers?
            try:
                results = rss_plugin.phase_handlers['input'](task, rss_config)
            except plugin.PluginError as e:
                log.error('Error attempting to get rss for %s: %s', rss_config['url'], e)
            else:
                entries.update(results)
        return entries

Пример #4

0

Показать файл

Файл: piratebay.py Проект: Flexget/Flexget

    def search(self, task, entry, config=None):
        """
        Search for name from piratebay.
        """
        if not isinstance(config, dict):
            config = {}
        self.set_urls(config.get('url', URL))
        sort = SORT.get(config.get('sort_by', 'seeds'))
        if config.get('sort_reverse'):
            sort += 1
        if isinstance(config.get('category'), int):
            category = config['category']
        else:
            category = CATEGORIES.get(config.get('category', 'all'))
        filter_url = '/0/%d/%d' % (sort, category)

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)

            # TPB search doesn't like dashes or quotes
            query = query.replace('-', ' ').replace("'", " ")

            # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
            url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url)
            log.debug('Using %s as piratebay search url' % url)
            page = task.requests.get(url).content
            soup = get_soup(page)
            for link in soup.find_all('a', attrs={'class': 'detLink'}):
                entry = Entry()
                entry['title'] = self.extract_title(link)
                if not entry['title']:
                    log.error('Malformed search result. No title or url found. Skipping.')
                    continue
                href = link.get('href')
                if href.startswith('/'):  # relative link?
                    href = self.url + href
                entry['url'] = href
                tds = link.parent.parent.parent.find_all('td')
                entry['torrent_seeds'] = int(tds[-2].contents[0])
                entry['torrent_leeches'] = int(tds[-1].contents[0])
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches']
                )
                # Parse content_size
                size_text = link.find_next(attrs={'class': 'detDesc'}).get_text()
                if size_text:
                    size = re.search(r'Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)', size_text)
                    if size:
                        entry['content_size'] = parse_filesize(size.group(1))
                    else:
                        log.error(
                            'Malformed search result? Title: "%s", No size? %s',
                            entry['title'],
                            size_text,
                        )

                entries.add(entry)

        return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))

Пример #5

0

Показать файл

Файл: SeriesSearch.py Проект: macross5/Flexget-Config

 def prepare_search_query(self, search_string):
     query = normalize_unicode(search_string)
     se = re.findall('((((|S)[\d]+(E|x)[\d]+)|(|S)[\d]+))$',query)[0][0]
     query = re.sub(se,'',query).strip()
     
     self.se = se
     self.query = query
     
     return query

Пример #6

0

Показать файл

Файл: torrentz.py Проект: Flexget/Flexget

    def search(self, task, entry, config=None):
        config = self.process_config(config)
        feed = REPUTATIONS[config['reputation']]
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string + config.get('extra_terms', ''))
            for domain in ['eu', 'is']:
                # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
                url = 'http://torrentz2.%s/%s?f=%s' % (domain, feed, quote(query.encode('utf-8')))
                log.debug('requesting: %s' % url)
                try:
                    r = task.requests.get(url)
                    break
                except requests.ConnectionError as err:
                    # The different domains all resolve to the same ip, so only try more if it was a dns error
                    log.warning('torrentz.%s connection failed. Error: %s' % (domain, err))
                    continue
                except requests.RequestException as err:
                    raise plugin.PluginError('Error getting torrentz search results: %s' % err)

            else:
                raise plugin.PluginError('Error getting torrentz search results')

            if not r.content.strip():
                raise plugin.PluginError(
                    'No data from %s. Maybe torrentz is blocking the FlexGet User-Agent' % url
                )

            rss = feedparser.parse(r.content)

            if rss.get('bozo_exception'):
                raise plugin.PluginError('Got bozo_exception (bad rss feed)')

            for item in rss.entries:
                m = re.search(
                    r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)',
                    item.description,
                    re.IGNORECASE,
                )
                if not m:
                    log.debug('regexp did not find seeds / peer data')
                    continue

                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['content_size'] = int(m.group(1))
                entry['torrent_seeds'] = int(m.group(2).replace(',', ''))
                entry['torrent_leeches'] = int(m.group(3).replace(',', ''))
                entry['torrent_info_hash'] = m.group(4).upper()
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches']
                )
                entries.add(entry)

        log.debug('Search got %d results' % len(entries))
        return entries

Пример #7

0

Показать файл

Файл: yts.py Проект: Flexget/Flexget

    def search(self, task, entry, config=None):
        entries = set()
        search_strings = [
            normalize_unicode(s) for s in entry.get('search_strings', [entry['title']])
        ]
        for search_string in search_strings:
            url = 'https://yts.am/api/v2/list_movies.json?query_term=%s' % (
                urllib.quote(search_string.encode('utf-8'))
            )

            log.debug('requesting: %s' % url)

            try:
                result = requests.get(url)
                try:
                    data = result.json()
                except ValueError:
                    log.debug('Could not decode json from response: %s', result.text)
                    raise plugin.PluginError('Error getting result from yts.')
            except requests.RequestException as e:
                raise plugin.PluginError('Could not retrieve query from yts (%s)' % e.args[0])
            if not data['status'] == 'ok':
                raise plugin.PluginError('failed to query YTS')

            try:
                if data['data']['movie_count'] > 0:
                    for item in data['data']['movies']:
                        for torrent in item['torrents']:
                            entry = Entry()
                            entry['title'] = item['title']
                            entry['year'] = item['year']
                            entry['url'] = torrent['url']
                            entry['content_size'] = parse_filesize(
                                str(torrent['size_bytes']) + "b"
                            )
                            entry['torrent_seeds'] = torrent['seeds']
                            entry['torrent_leeches'] = torrent['peers']
                            entry['torrent_info_hash'] = torrent['hash']
                            entry['torrent_availability'] = torrent_availability(
                                entry['torrent_seeds'], entry['torrent_leeches']
                            )
                            entry['quality'] = torrent['quality']
                            entry['imdb_id'] = item['imdb_code']
                            if entry.isvalid():
                                entries.add(entry)
            except Exception:
                log.debug('invalid return structure from YTS')

        log.debug('Search got %d results' % len(entries))
        return entries

Пример #8

0

Показать файл

Файл: nyaa.py Проект: Flexget/Flexget

    def search(self, task, entry, config):
        if not isinstance(config, dict):
            config = {'category': config}
        config.setdefault('category', 'anime eng')
        config.setdefault('filter', 'all')
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            name = normalize_unicode(search_string)
            url = 'https://www.nyaa.si/?page=rss&q=%s&c=%s&f=%s' % (
                quote(name.encode('utf-8')),
                CATEGORIES[config['category']],
                FILTERS.index(config['filter']),
            )

            log.debug('requesting: %s' % url)
            rss = feedparser.parse(url)

            status = rss.get('status', False)
            if status != 200:
                log.debug('Search result not 200 (OK), received %s' % status)
            if status >= 400:
                continue

            ex = rss.get('bozo_exception', False)
            if ex:
                log.error('Got bozo_exception (bad feed) on %s' % url)
                continue

            for item in rss.entries:
                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['torrent_seeds'] = int(item.nyaa_seeders)
                entry['torrent_leeches'] = int(item.nyaa_leechers)
                entry['torrent_info_hash'] = item.nyaa_infohash
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches']
                )
                if item.nyaa_size:
                    entry['content_size'] = parse_filesize(item.nyaa_size)

                entries.add(entry)

        return entries

Пример #9

0

Показать файл

Файл: rmz.py Проект: Flexget/Flexget

 def url_rewrite(self, task, entry):
     try:
         page = task.requests.get(entry['url'])
     except RequestException as e:
         raise UrlRewritingError(str(e))
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(str(e))
     link_elements = soup.find_all('pre', class_='links')
     if 'urls' in entry:
         urls = list(entry['urls'])
     else:
         urls = []
     for element in link_elements:
         urls.extend(element.text.splitlines())
     regexps = self.config.get('filehosters_re', [])
     filtered_urls = []
     for i, url in enumerate(urls):
         urls[i] = normalize_unicode(url)
         for regexp in regexps:
             if re.search(regexp, urls[i]):
                 filtered_urls.append(urls[i])
                 log.debug('Url: "%s" matched filehoster filter: %s', urls[i], regexp)
                 break
         else:
             if regexps:
                 log.debug(
                     'Url: "%s" does not match any of the given filehoster filters: %s',
                     urls[i],
                     str(regexps),
                 )
     if regexps:
         log.debug('Using filehosters_re filters: %s', str(regexps))
         urls = filtered_urls
     else:
         log.debug('No filehoster filters configured, using all found links.')
     num_links = len(urls)
     log.verbose('Found %d links at %s.', num_links, entry['url'])
     if num_links:
         entry['urls'] = urls
         entry['url'] = urls[0]
     else:
         raise UrlRewritingError('No useable links found at %s' % entry['url'])

Пример #10

0

Показать файл

Файл: descargas2020.py Проект: Flexget/Flexget

 def search(self, task, entry, config=None):
     if not config:
         log.debug('Descargas2020 disabled')
         return set()
     log.debug('Search Descargas2020')
     url_search = 'http://descargas2020.com/buscar'
     results = set()
     for search_string in entry.get('search_strings', [entry['title']]):
         query = normalize_unicode(search_string)
         query = re.sub(r' \(\d\d\d\d\)$', '', query)
         log.debug('Searching Descargas2020 %s', query)
         query = unicodedata.normalize('NFD', query).encode('ascii', 'ignore')
         data = {'q': query}
         try:
             response = task.requests.post(url_search, data=data)
         except requests.RequestException as e:
             log.error('Error searching Descargas2020: %s', e)
             return results
         content = response.content
         soup = get_soup(content)
         soup2 = soup.find('ul', attrs={'class': 'buscar-list'})
         children = soup2.findAll('a', href=True)
         for child in children:
             entry = Entry()
             entry['url'] = child['href']
             entry_title = child.find('h2')
             if entry_title is None:
                 log.debug('Ignore empty entry')
                 continue
             entry_title = entry_title.text
             if not entry_title:
                 continue
             try:
                 entry_quality_lan = re.search(
                     r'.+ \[([^\]]+)\](\[[^\]]+\])+$', entry_title
                 ).group(1)
             except AttributeError:
                 log.debug('Quality not found')
                 continue
             entry_title = re.sub(r' \[.+]$', '', entry_title)
             entry['title'] = entry_title + ' ' + entry_quality_lan
             results.add(entry)
     log.debug('Finish search Descargas2020 with %d entries', len(results))
     return results

Пример #11

0

Показать файл

Файл: extratorrent.py Проект: tarzasai/.flexget

    def search(self, task, entry, config=None):
        if not isinstance(config, dict):
            config = {}

        category = CATEGORIES.get(config.get('category', 'all'), None)
        category_query = '&cid=%d' % category if category else ''

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)

            search_query = '&search=%s' % quote(query.encode('utf-8'))

            url = 'http://extratorrent.cc/rss.xml?type=search%s' % search_query

            log.debug('Using %s as extratorrent search url' % url)

            rss = feedparser.parse(url)
            status = rss.get('status', False)
            if status != 200:
                log.debug('Search result not 200 (OK), received %s' % status)
            if not status or status >= 400:
                continue

            for item in rss.entries:
                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['content_size'] = int(item.size) / 1024 / 1024
                entry['torrent_info_hash'] = item.info_hash

                if isinstance(item.seeders, int):
                    entry['torrent_seeds'] = int(item.seeders)

                if isinstance(item.leechers, int):
                    entry['torrent_leeches'] = int(item.leechers)

                entries.add(entry)

        return entries

Пример #12

0

Показать файл

Файл: BaseSearchPlugin.py Проект: macross5/Flexget-Config

 def prepare_search_query(self, search_string):
     return self.replace_sepcial_chars(normalize_unicode(search_string))

Пример #13

0

Показать файл

Файл: newtorrents.py Проект: pospqsjac/Flexget

    def entries_from_search(self, name, url=None):
        """Parses torrent download url from search results"""
        name = normalize_unicode(name)
        if not url:
            url = 'http://www.newtorrents.info/search/%s' % quote(
                name.encode('utf-8'), safe=b':/~?=&%')

        log.debug('search url: %s' % url)

        html = requests.get(url).text
        # fix </SCR'+'IPT> so that BS does not crash
        # TODO: should use beautifulsoup massage
        html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html)

        soup = get_soup(html)
        # saving torrents in dict
        torrents = []
        for link in soup.find_all('a', attrs={'href': re.compile('down.php')}):
            torrent_url = 'http://www.newtorrents.info%s' % link.get('href')
            release_name = link.parent.next.get('title')
            # quick dirty hack
            seed = link.find_next('td', attrs={
                'class': re.compile('s')
            }).renderContents()
            if seed == 'n/a':
                seed = 0
            else:
                try:
                    seed = int(seed)
                except ValueError:
                    log.warning(
                        'Error converting seed value (%s) from newtorrents to integer.'
                        % seed)
                    seed = 0

            # TODO: also parse content_size and peers from results
            torrents.append(
                Entry(
                    title=release_name,
                    url=torrent_url,
                    torrent_seeds=seed,
                    torrent_availability=torrent_availability(seed, 0),
                ))
        # sort with seed number Reverse order
        torrents.sort(reverse=True,
                      key=lambda x: x.get('torrent_availability', 0))
        # choose the torrent
        if not torrents:
            dashindex = name.rfind('-')
            if dashindex != -1:
                return self.entries_from_search(name[:dashindex])
            else:
                return torrents
        else:
            if len(torrents) == 1:
                log.debug('found only one matching search result.')
            else:
                log.debug(
                    'search result contains multiple matches, sorted %s by most seeders'
                    % torrents)
            return torrents

Пример #14

0

Показать файл

Файл: piratebay.py Проект: upupoo108/Flexget

    def search(self, task, entry, config=None):
        """
        Search for name from piratebay.
        """
        if not isinstance(config, dict):
            config = {}
        self.set_urls(config.get('url', URL))
        sort = SORT.get(config.get('sort_by', 'seeds'))
        if config.get('sort_reverse'):
            sort += 1
        if isinstance(config.get('category'), int):
            category = config['category']
        else:
            category = CATEGORIES.get(config.get('category', 'all'))
        filter_url = '/0/%d/%d' % (sort, category)

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)

            # TPB search doesn't like dashes or quotes
            query = query.replace('-', ' ').replace("'", " ")

            # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
            url = '%s/search/%s%s' % (self.url, quote(
                query.encode('utf-8')), filter_url)
            logger.debug('Using {} as piratebay search url', url)
            page = task.requests.get(url).content
            soup = get_soup(page)
            for link in soup.find_all('a', attrs={'class': 'detLink'}):
                entry = Entry()
                entry['title'] = self.extract_title(link)
                if not entry['title']:
                    logger.error(
                        'Malformed search result. No title or url found. Skipping.'
                    )
                    continue
                href = link.get('href')
                if href.startswith('/'):  # relative link?
                    href = self.url + href
                entry['url'] = href
                row = link.parent.parent.parent
                description = row.find_all('a', attrs={'class': 'detDesc'})
                if description and description[0].contents[0] == "piratebay ":
                    logger.debug('Advertisement entry. Skipping.')
                    continue
                tds = row.find_all('td')
                entry['torrent_seeds'] = int(tds[-2].contents[0])
                entry['torrent_leeches'] = int(tds[-1].contents[0])
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                # Parse content_size
                size_text = link.find_next(attrs={
                    'class': 'detDesc'
                }).get_text()
                if size_text:
                    size = re.search(r'Size (\d+(\.\d+)?\xa0(?:[PTGMK])?i?B)',
                                     size_text)
                    if size:
                        entry['content_size'] = parse_filesize(size.group(1))
                    else:
                        logger.error(
                            'Malformed search result? Title: "{}", No size? {}',
                            entry['title'],
                            size_text,
                        )

                entries.add(entry)

        return sorted(entries,
                      reverse=True,
                      key=lambda x: x.get('torrent_availability'))

Пример #15

0

Показать файл

    def search(self, task, entry, config=None):
        """
        Search for name from iptorrents
        """

        categories = config.get('category', 'All')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]

        # If there are any text categories, turn them into their id number
        categories = [
            c if isinstance(c, int) else CATEGORIES[c] for c in categories
        ]
        category_params = {str(c): '' for c in categories if str(c)}

        entries = set()

        for search_string in entry.get('search_strings', [entry['title']]):
            search_params = {
                key: value
                for (key, value) in category_params.items()
            }

            query = normalize_unicode(search_string)
            search_params.update({'q': query, 'qf': ''})

            logger.debug('searching with params: {}', search_params)
            if config.get('free'):
                req = requests.get(FREE_SEARCH_URL,
                                   params=search_params,
                                   cookies={
                                       'uid': str(config['uid']),
                                       'pass': config['password']
                                   })
            else:
                req = requests.get(SEARCH_URL,
                                   params=search_params,
                                   cookies={
                                       'uid': str(config['uid']),
                                       'pass': config['password']
                                   })
            logger.debug('full search URL: {}', req.url)

            if '/u/' + str(config['uid']) not in req.text:
                raise plugin.PluginError(
                    "Invalid cookies (user not logged in)...")

            soup = get_soup(req.content, parser="html.parser")
            torrents = soup.find('table', {'id': 'torrents'})

            results = torrents.findAll('tr')
            for torrent in results:
                if torrent.th and 'ac' in torrent.th.get('class'):
                    # Header column
                    continue
                if torrent.find('td', {'colspan': '99'}):
                    logger.debug('No results found for search {}',
                                 search_string)
                    break
                entry = Entry()
                link = torrent.find('a', href=re.compile('download'))['href']
                entry[
                    'url'] = f"{BASE_URL}{link}?torrent_pass={config.get('rss_key')}"
                entry['title'] = torrent.find('a',
                                              href=re.compile('details')).text

                seeders = torrent.findNext('td', {
                    'class': 'ac t_seeders'
                }).text
                leechers = torrent.findNext('td', {
                    'class': 'ac t_leechers'
                }).text
                entry['torrent_seeds'] = int(seeders)
                entry['torrent_leeches'] = int(leechers)
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])

                size = torrent.findNext(
                    text=re.compile(r'^([\.\d]+) ([GMK]?)B$'))
                size = re.search(r'^([\.\d]+) ([GMK]?)B$', size)

                entry['content_size'] = parse_filesize(size.group(0))
                logger.debug('Found entry {}', entry)
                entries.add(entry)

        return entries

Пример #16

0

Показать файл

Файл: torrentleech.py Проект: sirtyface/Flexget-1

    def search(self, task, entry, config=None):
        """
        Search for name from torrentleech.
        """
        request_headers = {'User-Agent': 'curl/7.54.0'}
        rss_key = config['rss_key']

        # build the form request:
        data = {'username': config['username'], 'password': config['password']}
        # POST the login form:
        try:
            login = task.requests.post(
                'https://www.torrentleech.org/user/account/login/',
                data=data,
                headers=request_headers,
                allow_redirects=True,
            )
        except RequestException as e:
            raise PluginError('Could not connect to torrentleech: %s' % str(e))

        if login.url.endswith('/user/account/login/'):
            raise PluginError('Could not login to torrentleech, faulty credentials?')

        if not isinstance(config, dict):
            config = {}
            # sort = SORT.get(config.get('sort_by', 'seeds'))
            # if config.get('sort_reverse'):
            # sort += 1
        categories = config.get('category', 'all')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]
        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories]
        filter_url = '/categories/{}'.format(','.join(str(c) for c in categories))
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string).replace(":", "")
            # urllib.quote will crash if the unicode string has non ascii characters,
            # so encode in utf-8 beforehand

            url = (
                'https://www.torrentleech.org/torrents/browse/list/query/'
                + quote(query.encode('utf-8'))
                + filter_url
            )
            logger.debug('Using {} as torrentleech search url', url)

            results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json()

            for torrent in results['torrentList']:
                entry = Entry()
                entry['download_headers'] = request_headers
                entry['title'] = torrent['name']

                # construct download URL
                torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(
                    torrent['fid'], rss_key, torrent['filename']
                )
                logger.debug('RSS-ified download link: {}', torrent_url)
                entry['url'] = torrent_url

                # seeders/leechers
                entry['torrent_seeds'] = torrent['seeders']
                entry['torrent_leeches'] = torrent['leechers']
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches']
                )
                entry['content_size'] = parse_filesize(str(torrent['size']) + ' b')
                entries.add(entry)

        return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))

Пример #17

0

Показать файл

Файл: rlsbb.py Проект: Flexget/Flexget

    def url_rewrite(self, task, entry):
        soup = self._get_soup(task, entry['url'])

        # grab links from the main post:
        link_elements = []
        log.debug(
            'Searching %s for a tags where the text matches one of: %s',
            entry['url'],
            str(self.config.get('link_text_re')),
        )
        for regexp in self.config.get('link_text_re'):
            link_elements.extend(soup.find_all('a', string=re.compile(regexp)))
        log.debug('Original urls: %s', str(entry['urls']))
        if 'urls' in entry:
            urls = list(entry['urls'])
            log.debug('Original urls: %s', str(entry['urls']))
        else:
            urls = []
        log.debug('Found link elements: %s', str(link_elements))
        for element in link_elements:
            if re.search('nfo1.rlsbb.(ru|com)', element['href']):
                # grab multipart links
                urls.extend(self.grab_multilinks(task, element['href']))
            else:
                urls.append(element['href'])

        # grab links from comments
        regexps = self.config.get('filehosters_re', [])
        if self.config.get('parse_comments'):
            comments = soup.find_all('div', id=re.compile("commentbody"))
            log.debug('Comment parsing enabled: found %d comments.', len(comments))
            if comments and not regexps:
                log.warn(
                    'You have enabled comment parsing but you did not define any filehoster_re filter. You may get a lot of unwanted and potentially dangerous links from the comments.'
                )
            for comment in comments:
                links = comment.find_all('a')
                for link in links:
                    urls.append(link['href'])

        # filter urls:
        filtered_urls = []
        for i, url in enumerate(urls):
            urls[i] = normalize_unicode(url)
            for regexp in regexps:
                if re.search(regexp, urls[i]):
                    filtered_urls.append(urls[i])
                    log.debug('Url: "%s" matched filehoster filter: %s', urls[i], regexp)
                    break
            else:
                if regexps:
                    log.debug(
                        'Url: "%s" was discarded because it does not match any of the given filehoster filters: %s',
                        urls[i],
                        str(regexps),
                    )
        if regexps:
            log.debug('Using filehosters_re filters: %s', str(regexps))
            urls = filtered_urls
        else:
            log.debug('No filehoster filters configured, using all found links.')
        num_links = len(urls)
        log.verbose('Found %d links at %s.', num_links, entry['url'])
        if num_links:
            entry['urls'] = urls
            entry['url'] = urls[0]
        else:
            raise UrlRewritingError('No useable links found at %s' % entry['url'])

Пример #18

0

Показать файл

    def search(self, task, entry, config=None):
        """
        Search for name from iptorrents
        """

        categories = config.get('category', 'All')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]

        # If there are any text categories, turn them into their id number
        categories = [
            c if isinstance(c, int) else CATEGORIES[c] for c in categories
        ]
        filter_url = '&'.join((str(c) + '=') for c in categories)

        entries = set()

        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query = quote_plus(query.encode('utf8'))

            url = "{base_url}/t?{filter}&q={query}&qf=".format(
                base_url=BASE_URL, filter=filter_url, query=query)
            logger.debug('searching with url: {}', url)
            req = requests.get(url,
                               cookies={
                                   'uid': str(config['uid']),
                                   'pass': config['password']
                               })

            if '/u/' + str(config['uid']) not in req.text:
                raise plugin.PluginError(
                    "Invalid cookies (user not logged in)...")

            soup = get_soup(req.content, parser="html.parser")
            torrents = soup.find('table', {'id': 'torrents'})

            results = torrents.findAll('tr')
            for torrent in results:
                if torrent.th and 'ac' in torrent.th.get('class'):
                    # Header column
                    continue
                if torrent.find('td', {'colspan': '99'}):
                    logger.debug('No results found for search {}',
                                 search_string)
                    break
                entry = Entry()
                link = torrent.find('a', href=re.compile('download'))['href']
                entry['url'] = "{base}{link}?torrent_pass={key}".format(
                    base=BASE_URL, link=link, key=config.get('rss_key'))
                entry['title'] = torrent.find('a',
                                              href=re.compile('details')).text

                seeders = torrent.findNext('td', {
                    'class': 'ac t_seeders'
                }).text
                leechers = torrent.findNext('td', {
                    'class': 'ac t_leechers'
                }).text
                entry['torrent_seeds'] = int(seeders)
                entry['torrent_leeches'] = int(leechers)
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])

                size = torrent.findNext(
                    text=re.compile(r'^([\.\d]+) ([GMK]?)B$'))
                size = re.search(r'^([\.\d]+) ([GMK]?)B$', size)

                entry['content_size'] = parse_filesize(size.group(0))
                logger.debug('Found entry {}', entry)
                entries.add(entry)

        return entries

Пример #19

0

Показать файл

Файл: torrentday.py Проект: yuyulklk/Flexget

    def search(self, task, entry, config=None):
        """
        Search for name from torrentday.
        """

        categories = config.get('category', 'all')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]
        # If there are any text categories, turn them into their id number
        categories = [
            c if isinstance(c, int) else CATEGORIES[c] for c in categories
        ]
        params = {
            'cata': 'yes',
            'c{}'.format(','.join(str(c) for c in categories)): 1,
            'clear-new': 1,
        }
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):

            url = 'https://www.torrentday.com/t'
            params['q'] = normalize_unicode(search_string).replace(':', '')
            cookies = {
                'uid': config['uid'],
                'pass': config['passkey'],
                '__cfduid': config['cfduid'],
            }

            try:
                page = requests.get(url, params=params,
                                    cookies=cookies).content
            except RequestException as e:
                raise PluginError(
                    'Could not connect to torrentday: {}'.format(e))

            # the following should avoid table being None due to a malformed
            # html in td search results
            soup = get_soup(page).contents[1].contents[1].next.next.nextSibling
            table = soup.find('table', {'id': 'torrentTable'})
            if table is None:
                raise PluginError(
                    'Search returned by torrentday appears to be empty or malformed.'
                )

            # the first row is the header so skip it
            for tr in table.find_all('tr')[1:]:
                entry = Entry()
                # find the torrent names
                td = tr.find('td', {'class': 'torrentNameInfo'})
                if not td:
                    log.warning('Could not find entry torrentNameInfo for %s.',
                                search_string)
                    continue
                title = td.find('a')
                if not title:
                    log.warning('Could not determine title for %s.',
                                search_string)
                    continue
                entry['title'] = title.contents[0]
                log.debug('title: %s', title.contents[0])

                # find download link
                torrent_url = tr.find('td', {'class': 'ac'})
                if not torrent_url:
                    log.warning('Could not determine download link for %s.',
                                search_string)
                    continue
                torrent_url = torrent_url.find('a').get('href')

                # construct download URL
                torrent_url = ('https://www.torrentday.com/' + torrent_url +
                               '?torrent_pass='******'rss_key'])
                log.debug('RSS-ified download link: %s', torrent_url)
                entry['url'] = torrent_url

                # us tr object for seeders/leechers
                seeders = tr.find('td', {'class': 'ac seedersInfo'})
                leechers = tr.find('td', {'class': 'ac leechersInfo'})
                entry['torrent_seeds'] = int(seeders.contents[0].replace(
                    ',', ''))
                entry['torrent_leeches'] = int(leechers.contents[0].replace(
                    ',', ''))
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])

                # use tr object for size
                size = tr.find(
                    'td',
                    text=re.compile(r'([\.\d]+) ([TGMKk]?)B')).contents[0]
                size = re.search(r'([\.\d]+) ([TGMKk]?)B', str(size))

                entry['content_size'] = parse_filesize(size.group(0))

                entries.add(entry)

        return sorted(entries,
                      reverse=True,
                      key=lambda x: x.get('torrent_availability'))

Пример #20

0

Показать файл

    def url_rewrite(self, task, entry):
        soup = self._get_soup(task, entry['url'])

        # grab links from the main post:
        link_elements = []
        log.debug(
            'Searching %s for a tags where the text matches one of: %s',
            entry['url'],
            str(self.config.get('link_text_re')),
        )
        for regexp in self.config.get('link_text_re'):
            link_elements.extend(soup.find_all('a', string=re.compile(regexp)))
        log.debug('Original urls: %s', str(entry['urls']))
        if 'urls' in entry:
            urls = list(entry['urls'])
            log.debug('Original urls: %s', str(entry['urls']))
        else:
            urls = []
        log.debug('Found link elements: %s', str(link_elements))
        for element in link_elements:
            if re.search('nfo1.rlsbb.(ru|com)', element['href']):
                # grab multipart links
                urls.extend(self.grab_multilinks(task, element['href']))
            else:
                urls.append(element['href'])

        # grab links from comments
        regexps = self.config.get('filehosters_re', [])
        if self.config.get('parse_comments'):
            comments = soup.find_all('div', id=re.compile("commentbody"))
            log.debug('Comment parsing enabled: found %d comments.',
                      len(comments))
            if comments and not regexps:
                log.warn(
                    'You have enabled comment parsing but you did not define any filehoster_re filter. You may get a lot of unwanted and potentially dangerous links from the comments.'
                )
            for comment in comments:
                links = comment.find_all('a')
                for link in links:
                    urls.append(link['href'])

        # filter urls:
        filtered_urls = []
        for i, url in enumerate(urls):
            urls[i] = normalize_unicode(url)
            for regexp in regexps:
                if re.search(regexp, urls[i]):
                    filtered_urls.append(urls[i])
                    log.debug('Url: "%s" matched filehoster filter: %s',
                              urls[i], regexp)
                    break
            else:
                if regexps:
                    log.debug(
                        'Url: "%s" was discarded because it does not match any of the given filehoster filters: %s',
                        urls[i],
                        str(regexps),
                    )
        if regexps:
            log.debug('Using filehosters_re filters: %s', str(regexps))
            urls = filtered_urls
        else:
            log.debug(
                'No filehoster filters configured, using all found links.')
        num_links = len(urls)
        log.verbose('Found %d links at %s.', num_links, entry['url'])
        if num_links:
            entry['urls'] = urls
            entry['url'] = urls[0]
        else:
            raise UrlRewritingError('No useable links found at %s' %
                                    entry['url'])

Пример #21

0

Показать файл

 def prepare_search_query(self, search_string):
     return self.replace_sepcial_chars(normalize_unicode(search_string))

Пример #22

0

Показать файл

Файл: torrentday.py Проект: Flexget/Flexget

    def search(self, task, entry, config=None):
        """
        Search for name from torrentday.
        """

        categories = config.get('category', 'all')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]
        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories]
        params = {
            'cata': 'yes',
            'c{}'.format(','.join(str(c) for c in categories)): 1,
            'clear-new': 1,
        }
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):

            url = 'https://www.torrentday.com/t'
            params['q'] = normalize_unicode(search_string).replace(':', '')
            cookies = {
                'uid': config['uid'],
                'pass': config['passkey'],
                '__cfduid': config['cfduid'],
            }

            try:
                page = requests.get(url, params=params, cookies=cookies).content
            except RequestException as e:
                raise PluginError('Could not connect to torrentday: {}'.format(e))

            # the following should avoid table being None due to a malformed
            # html in td search results
            soup = get_soup(page).contents[1].contents[1].next.next.nextSibling
            table = soup.find('table', {'id': 'torrentTable'})
            if table is None:
                raise PluginError(
                    'Search returned by torrentday appears to be empty or malformed.'
                )

            # the first row is the header so skip it
            for tr in table.find_all('tr')[1:]:
                entry = Entry()
                # find the torrent names
                td = tr.find('td', {'class': 'torrentNameInfo'})
                if not td:
                    log.warning('Could not find entry torrentNameInfo for %s.', search_string)
                    continue
                title = td.find('a')
                if not title:
                    log.warning('Could not determine title for %s.', search_string)
                    continue
                entry['title'] = title.contents[0]
                log.debug('title: %s', title.contents[0])

                # find download link
                torrent_url = tr.find('td', {'class': 'ac'})
                if not torrent_url:
                    log.warning('Could not determine download link for %s.', search_string)
                    continue
                torrent_url = torrent_url.find('a').get('href')

                # construct download URL
                torrent_url = (
                    'https://www.torrentday.com/'
                    + torrent_url
                    + '?torrent_pass='******'rss_key']
                )
                log.debug('RSS-ified download link: %s', torrent_url)
                entry['url'] = torrent_url

                # us tr object for seeders/leechers
                seeders = tr.find('td', {'class': 'ac seedersInfo'})
                leechers = tr.find('td', {'class': 'ac leechersInfo'})
                entry['torrent_seeds'] = int(seeders.contents[0].replace(',', ''))
                entry['torrent_leeches'] = int(leechers.contents[0].replace(',', ''))
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches']
                )

                # use tr object for size
                size = tr.find('td', text=re.compile(r'([\.\d]+) ([TGMKk]?)B')).contents[0]
                size = re.search(r'([\.\d]+) ([TGMKk]?)B', str(size))

                entry['content_size'] = parse_filesize(size.group(0))

                entries.add(entry)

        return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))

Пример #23

0

Показать файл

Файл: cpasbien.py Проект: Flexget/Flexget

    def search(self, task, entry, config):
        """CPASBIEN search plugin

        Config example:

        tv_search_cpasbien:
            discover:
              what:
                 - trakt_list:
                   username: xxxxxxx
                   api_key: xxxxxxx
                        series: watchlist
                  from:
                    - cpasbien:
                        category: "series-vostfr"
                  interval: 1 day
                  ignore_estimations: yes

        Category is ONE of:
            all
            films
            series
            musique
            films-french
            1080p
            720p
            series-francaise
            films-dvdrip
            films-vostfr
            series-vostfr
            ebook
        """

        base_url = 'http://www.cpasbien.io'
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            search_string = search_string.replace(' ', '-').lower()
            search_string = search_string.replace('(', '')
            search_string = search_string.replace(')', '')
            query = normalize_unicode(search_string)
            query_url_fragment = quote_plus(query.encode('utf-8'))
            # http://www.cpasbien.pe/recherche/ncis.html
            if config['category'] == 'all':
                str_url = (base_url, 'recherche', query_url_fragment)
                url = '/'.join(str_url)
            else:
                category_url_fragment = '%s' % config['category']
                str_url = (base_url, 'recherche', category_url_fragment, query_url_fragment)
                url = '/'.join(str_url)
            log.debug('search url: %s' % url + '.html')
            # GET URL
            f = task.requests.get(url + '.html').content
            soup = get_soup(f)
            if soup.findAll(text=re.compile(' 0 torrents')):
                log.debug('search returned no results')
            else:
                nextpage = 0
                while nextpage >= 0:
                    if nextpage > 0:
                        newurl = url + '/page-' + str(nextpage)
                        log.debug('-----> NEXT PAGE : %s' % newurl)
                        f1 = task.requests.get(newurl).content
                        soup = get_soup(f1)
                    for result in soup.findAll('div', attrs={'class': re.compile('ligne')}):
                        entry = Entry()
                        link = result.find('a', attrs={'href': re.compile('dl-torrent')})
                        entry['title'] = link.contents[0]
                        # REWRITE URL
                        page_link = link.get('href')
                        link_rewrite = page_link.split('/')
                        # get last value in array remove .html and replace by .torrent
                        endlink = link_rewrite[-1]
                        str_url = (base_url, '/telechargement/', endlink[:-5], '.torrent')
                        entry['url'] = ''.join(str_url)

                        log.debug('Title: %s | DL LINK: %s' % (entry['title'], entry['url']))

                        entry['torrent_seeds'] = int(
                            result.find('span', attrs={'class': re.compile('seed')}).text
                        )
                        entry['torrent_leeches'] = int(
                            result.find('div', attrs={'class': re.compile('down')}).text
                        )
                        size = result.find('div', attrs={'class': re.compile('poid')}).text

                        entry['content_size'] = parse_filesize(size, si=False)

                        if entry['torrent_seeds'] > 0:
                            entries.add(entry)
                        else:
                            log.debug('0 SEED, not adding entry')
                    if soup.find(text=re.compile('Suiv')):
                        nextpage += 1
                    else:
                        nextpage = -1
        return entries

Пример #24

0

Показать файл

Файл: torrentleech.py Проект: Flexget/Flexget

    def search(self, task, entry, config=None):
        """
        Search for name from torrentleech.
        """
        request_headers = {'User-Agent': 'curl/7.54.0'}
        rss_key = config['rss_key']

        # build the form request:
        data = {'username': config['username'], 'password': config['password']}
        # POST the login form:
        try:
            login = task.requests.post(
                'https://www.torrentleech.org/user/account/login/',
                data=data,
                headers=request_headers,
                allow_redirects=True,
            )
        except RequestException as e:
            raise PluginError('Could not connect to torrentleech: %s' % str(e))

        if login.url.endswith('/user/account/login/'):
            raise PluginError('Could not login to torrentleech, faulty credentials?')

        if not isinstance(config, dict):
            config = {}
            # sort = SORT.get(config.get('sort_by', 'seeds'))
            # if config.get('sort_reverse'):
            # sort += 1
        categories = config.get('category', 'all')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]
        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories]
        filter_url = '/categories/{}'.format(','.join(str(c) for c in categories))
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string).replace(":", "")
            # urllib.quote will crash if the unicode string has non ascii characters,
            # so encode in utf-8 beforehand

            url = (
                'https://www.torrentleech.org/torrents/browse/list/query/'
                + quote(query.encode('utf-8'))
                + filter_url
            )
            log.debug('Using %s as torrentleech search url', url)

            results = task.requests.get(url, headers=request_headers, cookies=login.cookies).json()

            for torrent in results['torrentList']:
                entry = Entry()
                entry['download_headers'] = request_headers
                entry['title'] = torrent['name']

                # construct download URL
                torrent_url = 'https://www.torrentleech.org/rss/download/{}/{}/{}'.format(
                    torrent['fid'], rss_key, torrent['filename']
                )
                log.debug('RSS-ified download link: %s', torrent_url)
                entry['url'] = torrent_url

                # seeders/leechers
                entry['torrent_seeds'] = torrent['seeders']
                entry['torrent_leeches'] = torrent['leechers']
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches']
                )
                entry['content_size'] = parse_filesize(str(torrent['size']) + ' b')
                entries.add(entry)

        return sorted(entries, reverse=True, key=lambda x: x.get('torrent_availability'))

Пример #25

0

Показать файл

Файл: cpasbien.py Проект: sirtyface/Flexget-1

    def search(self, task, entry, config):
        """CPASBIEN search plugin

        Config example:

        tv_search_cpasbien:
            discover:
              what:
                 - trakt_list:
                   username: xxxxxxx
                   api_key: xxxxxxx
                        series: watchlist
                  from:
                    - cpasbien:
                        category: "series-vostfr"
                  interval: 1 day
                  ignore_estimations: yes

        Category is ONE of:
            all
            films
            series
            musique
            films-french
            1080p
            720p
            series-francaise
            films-dvdrip
            films-vostfr
            series-vostfr
            ebook
        """

        base_url = 'http://www.cpasbien.io'
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            search_string = search_string.replace(' ', '-').lower()
            search_string = search_string.replace('(', '')
            search_string = search_string.replace(')', '')
            query = normalize_unicode(search_string)
            query_url_fragment = quote_plus(query.encode('utf-8'))
            # http://www.cpasbien.pe/recherche/ncis.html
            if config['category'] == 'all':
                str_url = (base_url, 'recherche', query_url_fragment)
                url = '/'.join(str_url)
            else:
                category_url_fragment = '%s' % config['category']
                str_url = (base_url, 'recherche', category_url_fragment,
                           query_url_fragment)
                url = '/'.join(str_url)
            logger.debug('search url: {}', url + '.html')
            # GET URL
            f = task.requests.get(url + '.html').content
            soup = get_soup(f)
            if soup.findAll(text=re.compile(' 0 torrents')):
                logger.debug('search returned no results')
            else:
                nextpage = 0
                while nextpage >= 0:
                    if nextpage > 0:
                        newurl = url + '/page-' + str(nextpage)
                        logger.debug('-----> NEXT PAGE : {}', newurl)
                        f1 = task.requests.get(newurl).content
                        soup = get_soup(f1)
                    for result in soup.findAll(
                            'div', attrs={'class': re.compile('ligne')}):
                        entry = Entry()
                        link = result.find(
                            'a', attrs={'href': re.compile('dl-torrent')})
                        entry['title'] = link.contents[0]
                        # REWRITE URL
                        page_link = link.get('href')
                        link_rewrite = page_link.split('/')
                        # get last value in array remove .html and replace by .torrent
                        endlink = link_rewrite[-1]
                        str_url = (base_url, '/telechargement/', endlink[:-5],
                                   '.torrent')
                        entry['url'] = ''.join(str_url)

                        logger.debug('Title: {} | DL LINK: {}', entry['title'],
                                     entry['url'])

                        entry['torrent_seeds'] = int(
                            result.find('span',
                                        attrs={
                                            'class': re.compile('seed')
                                        }).text)
                        entry['torrent_leeches'] = int(
                            result.find('div',
                                        attrs={
                                            'class': re.compile('down')
                                        }).text)
                        size = result.find('div',
                                           attrs={
                                               'class': re.compile('poid')
                                           }).text

                        entry['content_size'] = parse_filesize(size, si=False)

                        if entry['torrent_seeds'] > 0:
                            entries.add(entry)
                        else:
                            logger.debug('0 SEED, not adding entry')
                    if soup.find(text=re.compile('Suiv')):
                        nextpage += 1
                    else:
                        nextpage = -1
        return entries

Пример #26

0

Показать файл

Файл: iptorrents.py Проект: Flexget/Flexget

    def search(self, task, entry, config=None):
        """
        Search for name from iptorrents
        """

        categories = config.get('category', 'All')
        # Make sure categories is a list
        if not isinstance(categories, list):
            categories = [categories]

        # If there are any text categories, turn them into their id number
        categories = [c if isinstance(c, int) else CATEGORIES[c] for c in categories]
        filter_url = '&'.join((str(c) + '=') for c in categories)

        entries = set()

        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query = quote_plus(query.encode('utf8'))

            url = "{base_url}/t?{filter}&q={query}&qf=".format(
                base_url=BASE_URL, filter=filter_url, query=query
            )
            log.debug('searching with url: %s' % url)
            req = requests.get(
                url, cookies={'uid': str(config['uid']), 'pass': config['password']}
            )

            if '/u/' + str(config['uid']) not in req.text:
                raise plugin.PluginError("Invalid cookies (user not logged in)...")

            soup = get_soup(req.content, parser="html.parser")
            torrents = soup.find('table', {'id': 'torrents'})

            results = torrents.findAll('tr')
            for torrent in results:
                if torrent.th and 'ac' in torrent.th.get('class'):
                    # Header column
                    continue
                if torrent.find('td', {'colspan': '99'}):
                    log.debug('No results found for search %s', search_string)
                    break
                entry = Entry()
                link = torrent.find('a', href=re.compile('download'))['href']
                entry['url'] = "{base}{link}?torrent_pass={key}".format(
                    base=BASE_URL, link=link, key=config.get('rss_key')
                )
                entry['title'] = torrent.find('a', href=re.compile('details')).text

                seeders = torrent.findNext('td', {'class': 'ac t_seeders'}).text
                leechers = torrent.findNext('td', {'class': 'ac t_leechers'}).text
                entry['torrent_seeds'] = int(seeders)
                entry['torrent_leeches'] = int(leechers)
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches']
                )

                size = torrent.findNext(text=re.compile(r'^([\.\d]+) ([GMK]?)B$'))
                size = re.search(r'^([\.\d]+) ([GMK]?)B$', size)

                entry['content_size'] = parse_filesize(size.group(0))
                log.debug('Found entry %s', entry)
                entries.add(entry)

        return entries

Пример #27

0

Показать файл

Файл: torrentz.py Проект: oGi4i/Flexget

    def search(self, task, entry, config=None):
        config = self.process_config(config)
        feed = REPUTATIONS[config['reputation']]
        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string +
                                      config.get('extra_terms', ''))
            for domain in ['eu', 'is']:
                # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
                url = 'http://torrentz2.%s/%s?f=%s' % (
                    domain, feed, quote(query.encode('utf-8')))
                log.debug('requesting: %s' % url)
                try:
                    r = task.requests.get(url)
                    break
                except requests.ConnectionError as err:
                    # The different domains all resolve to the same ip, so only try more if it was a dns error
                    log.warning('torrentz.%s connection failed. Error: %s' %
                                (domain, err))
                    continue
                except requests.RequestException as err:
                    raise plugin.PluginError(
                        'Error getting torrentz search results: %s' % err)

            else:
                raise plugin.PluginError(
                    'Error getting torrentz search results')

            if not r.content.strip():
                raise plugin.PluginError(
                    'No data from %s. Maybe torrentz is blocking the FlexGet User-Agent'
                    % url)

            rss = feedparser.parse(r.content)

            if rss.get('bozo_exception'):
                raise plugin.PluginError('Got bozo_exception (bad rss feed)')

            for item in rss.entries:
                m = re.search(
                    r'Size: ([\d]+) Mb Seeds: ([,\d]+) Peers: ([,\d]+) Hash: ([a-f0-9]+)',
                    item.description,
                    re.IGNORECASE,
                )
                if not m:
                    log.debug('regexp did not find seeds / peer data')
                    continue

                entry = Entry()
                entry['title'] = item.title
                entry['url'] = item.link
                entry['content_size'] = int(m.group(1))
                entry['torrent_seeds'] = int(m.group(2).replace(',', ''))
                entry['torrent_leeches'] = int(m.group(3).replace(',', ''))
                entry['torrent_info_hash'] = m.group(4).upper()
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches'])
                entries.add(entry)

        log.debug('Search got %d results' % len(entries))
        return entries

Python normalize_unicode примеры использования