Exemplo n.º 1
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        last_recent_search = self.last_recent_search
        last_recent_search = '' if not last_recent_search else last_recent_search.replace(
            'id-', '')

        for mode in search_params:
            urls = []
            for search_string in search_params[mode]:
                urls += [[]]
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % (
                    self._categories_string(), '+'.join(
                        search_string.replace('.', ' ').split()),
                    ('', '&freeleech=on')[self.freeleech])
                for page in range((3, 5)['Cache' == mode])[:-1]:
                    urls[-1] += [search_url + '&page=%s' % page]
            results += self._search_urls(mode, last_recent_search, urls)
            last_recent_search = ''

        return results
Exemplo n.º 2
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        last_recent_search = self.last_recent_search
        last_recent_search = '' if not last_recent_search else last_recent_search.replace(
            'id-', '')

        for mode in search_params:
            urls = []
            for search_string in search_params[mode]:
                urls += [[]]
                search_string = unidecode(search_string) or search_string
                for page in range((3, 5)['Cache' == mode])[1:]:
                    # URL with 50 tv-show results, or max 150 if adjusted in IPTorrents profile
                    urls[-1] += [
                        self.urls['search'] %
                        (self._categories_string(mode, '%s', ';'),
                         search_string, (';free', '')[not self.freeleech],
                         (';o=seeders', '')['Cache' == mode], page)
                    ]
                results += self._search_urls(mode, last_recent_search, urls)
                last_recent_search = ''

            return results
Exemplo n.º 3
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        last_recent_search = self.last_recent_search
        last_recent_search = '' if not last_recent_search else last_recent_search.replace(
            'id-', '')
        for mode in search_params:
            urls = []
            for search_string in search_params[mode]:
                urls += [[]]
                for page in range((3, 5)['Cache' == mode])[1:]:
                    urls[-1] += [
                        self.urls[('search', 'browse')['Cache' == mode]] % {
                            'cats':
                            self._categories_string(mode, '', ','),
                            'query':
                            unidecode(search_string) or search_string,
                            'x':
                            '%spage/%s' % (('facets/tags:FREELEECH/',
                                            '')[not self.freeleech], page)
                        }
                    ]
            results += self._search_urls(mode, last_recent_search, urls)
            last_recent_search = ''

        return results
Exemplo n.º 4
0
    def _search_provider(self, search_params, **kwargs):

        self._authorised()
        results = []

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        url = self.urls['browse'] % self.passkey
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = url + (self.urls['search'] % search_string, '')['Cache' == mode]

                xml_data = self.cache.get_rss(search_url)

                cnt = len(items[mode])
                if xml_data and 'entries' in xml_data:
                    for entry in xml_data['entries']:
                        try:
                            if entry['title'] and 'download' in entry['link']:
                                items[mode].append((entry['title'], entry['link'], None, None))
                        except KeyError:
                            continue

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = list(set(results + items[mode]))

        return results
Exemplo n.º 5
0
    def _search_provider(self, search_params, **kwargs):

        results = []

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'seed': r'seed[^\d/]+([\d]+)',
                       'leech': r'leech[^\d/]+([\d]+)',
                       'size': r'size[^\d/]+([^/]+)',
                       'get': '(.*download.*)',
                       'title': r'NUKED\b\.(.*)$'
                   })])
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_string = search_string.replace(' ', '.')

                search_url = self.urls['search'] % (
                    self.api_key,
                    self._categories_string(mode, template='%s',
                                            delimiter=','), search_string)

                resp = self.get_url(search_url)
                if self.should_skip():
                    return results

                data = feedparser.parse(resp)
                tr = data and data.get('entries', []) or []

                cnt = len(items[mode])
                for item in tr:
                    try:
                        seeders, leechers, size = [
                            try_int(n, n) for n in [
                                rc[x].findall(item.summary)[0].strip()
                                for x in ('seed', 'leech', 'size')
                            ]
                        ]
                        if self._reject_item(seeders, leechers):
                            continue
                        title = rc['title'].sub(r'\1', item.title.strip())
                        download_url = self._link(rc['get'].findall(
                            getattr(item, 'link', ''))[0])
                    except (BaseException, Exception):
                        continue

                    if download_url and title:
                        items[mode].append((title, download_url, seeders,
                                            self._bytesizer(size)))

                time.sleep(1.1)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 6
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({'nodots': r'[\.\s]+'})])
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)

                search_url = self.urls['browse'] % (self.user_authkey, self.user_passkey)
                if 'Cache' != mode:
                    search_url += self.urls['search'] % rc['nodots'].sub('+', search_string)

                data_json = self.get_url(search_url, parse_json=True)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    for item in data_json.get('response', {}).get('results', []):
                        if self.freeleech and not item.get('isFreeleech'):
                            continue

                        seeders, leechers, group_name, torrent_id, size = [try_int(n, n) for n in [
                            item.get(x) for x in ['seeders', 'leechers', 'groupName', 'torrentId', 'size']]]
                        if self._reject_item(seeders, leechers):
                            continue

                        try:
                            title_parts = group_name.split('[')
                            maybe_res = re.findall(r'((?:72|108|216)0\w)', title_parts[1])
                            maybe_ext = re.findall('(?i)(%s)' % '|'.join(common.mediaExtensions), title_parts[1])
                            detail = title_parts[1].split('/')
                            detail[1] = detail[1].strip().lower().replace('mkv', 'x264')
                            title = '%s.%s' % (BS4Parser(title_parts[0].strip()).soup.string, '.'.join(
                                (maybe_res and [maybe_res[0]] or []) +
                                [detail[0].strip(), detail[1], maybe_ext and maybe_ext[0].lower() or 'mkv']))
                        except (IndexError, KeyError):
                            title = self.regulate_title(item, group_name)
                        download_url = self.urls['get'] % (self.user_authkey, self.user_passkey, torrent_id)

                        if title and download_url:
                            items[mode].append((title, download_url, seeders, self._bytesizer(size)))

                except (BaseException, Exception):
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 7
0
def clean_show_name(showname):
    """

    :param showname: show name
    :type showname: AnyStr
    :return:
    :rtype: AnyStr
    """
    if not PY2:
        return re.sub(r'[(\s]*(?:19|20)\d\d[)\s]*$', '', showname)
    return re.sub(r'[(\s]*(?:19|20)\d\d[)\s]*$', '', unidecode(showname))
Exemplo n.º 8
0
 def logged_in(self, y):
     if all([
             None is y or 'logout' in y,
             bool(
                 filter_list(lambda c: 'remember_web_' in c,
                             iterkeys(self.session.cookies)))
     ]):
         if None is not y:
             self.shows = dict(
                 re.findall(r'<option value="(\d+)">(.*?)</option>', y))
             for k, v in iteritems(self.shows):
                 self.shows[k] = sanitize_scene_name(
                     html_unescape(unidecode(decode_str(v))))
         return True
     return False
Exemplo n.º 9
0
    def _search_provider(self, search_params, **kwargs):
        results = []
        self.session.headers['Cache-Control'] = 'max-age=0'
        last_recent_search = self.last_recent_search
        last_recent_search = '' if not last_recent_search else last_recent_search.replace(
            'id-', '')
        for mode in search_params:
            urls = []
            for search_string in search_params[mode]:
                urls += [[]]
                search_string = unidecode(search_string)
                search_string = search_string if 'Cache' == mode else search_string.replace(
                    '.', ' ')
                for page in range((3, 5)['Cache' == mode])[1:]:
                    urls[-1] += [self.urls['search'] % (search_string, page)]
            results += self._search_urls(mode, last_recent_search, urls)
            last_recent_search = ''

        return results
Exemplo n.º 10
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % search_string

                data_json, sess = self.get_url(search_url, headers=dict(Authorization='Bearer %s' % self._token),
                                               resp_sess=True, parse_json=True)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                if isinstance(data_json, dict):
                    for tr in data_json.get('torrents') or data_json.get('releases') or []:
                        seeders, leechers, size = (try_int(n, n) for n in [
                            tr.get(x) for x in ('seeders', 'leechers', 'size')])
                        if not self._reject_item(seeders, leechers):
                            title = tr.get('releaseName')
                            download_id = tr.get('id') or tr.get('shortId')
                            download_url = download_id and self.urls.get('get') % (download_id, self._dkey)
                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))
                elif 200 != getattr(sess, 'response', {}).get('status_code', 0):
                    logger.log('The site search is not working, skipping')
                    break

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 11
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({'get': 'magnet'})])
        urls = []
        for mode in search_params:
            for search_string in search_params[mode]:
                if 'Cache' == mode:
                    search_url = self.urls['browse']
                else:
                    search_string = unidecode(search_string)
                    show_name = filter_list(
                        lambda x: x.lower() == re.sub(r'\s.*', '',
                                                      search_string.lower()),
                        list_values(self.shows))
                    if not show_name:
                        continue
                    search_url = self.urls['search'] % list_keys(
                        self.shows)[list_values(self.shows).index(
                            show_name[0])]

                if search_url in urls:
                    continue
                urls += [search_url]

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html) as soup:
                        tbl_rows = soup.select('ul.user-timeline > li')

                        if not len(tbl_rows):
                            raise generic.HaltParseException

                        for tr in tbl_rows:
                            try:
                                anchor = tr.find('a', href=rc['get'])
                                title = self.regulate_title(anchor)
                                download_url = self._link(anchor['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, None, None))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 12
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'info': 'detail',
                       'get': r'download\.'
                   })])

        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % (
                    search_string, self._categories_string(mode, '%s', ','))

                html = self.get_url(search_url, timeout=90)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    html = '<table%s' % re.split(r'</table>\s*<table',
                                                 html)[-1]
                    html = re.sub(r'</td>([^<]*)<tr', r'</td></tr>\1<tr', html)
                    with BS4Parser(html, parse_only='table') as tbl:
                        tbl_rows = [] if not tbl else tbl.find_all('tr')

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[1:]:
                            cells = tr.find_all('td')
                            if 6 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers, size = [
                                    try_int(n, n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in ('seed', 'leech', 'size')
                                    ]
                                ]
                                if self._reject_item(
                                        seeders, leechers,
                                        self.freeleech and (not tr.attrs.get(
                                            'bgcolor',
                                            '').upper().endswith('FF99'))):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (info.attrs.get('title')
                                         or info.get_text()).strip()
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError,
                                    KeyError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 13
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'info': 'view',
                       'get': 'download',
                       'nodots': r'[\.\s]+'
                   })])
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % (self._categories_string(
                    mode, 'filter_cat[%s]=1'), rc['nodots'].sub(
                        '+', search_string))
                html = self.get_url(search_url)

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(
                            html) or 'Translation: No search results' in html:
                        raise generic.HaltParseException

                    with BS4Parser(html, features=['html5lib',
                                                   'permissive']) as soup:
                        torrent_table = soup.find(id='torrent_table')
                        torrent_rows = [] if not torrent_table else torrent_table.find_all(
                            'tr')

                        if 2 > len(torrent_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in torrent_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers, size = [
                                    try_int(n, n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in ('seed', 'leech', 'size')
                                    ]
                                ]
                                if self._reject_item(
                                        seeders, leechers, self.freeleech
                                        and (not bool(
                                            re.search(
                                                r'(?i)>\s*Freeleech!*\s*<',
                                                cells[1].encode(
                                                    formatter='minimal'))))):
                                    continue

                                title = self.regulate_title(
                                    tr.find(
                                        'a',
                                        title=rc['info']).get_text().strip())
                                download_url = self._link(
                                    tr.find('a', title=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 14
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self.url:
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        for mode in search_params:
            for search_string in search_params[mode]:

                search_string = unidecode(search_string)

                search_url = self.urls['browse'] if 'Cache' == mode \
                    else self.urls['search'] % (quote_plus(search_string))

                html = self.get_url(search_url, provider=self)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException
                    with BS4Parser(html,
                                   parse_only=dict(
                                       table={
                                           'class': (lambda at: at and bool(
                                               re.search(r'table[23\d]*', at)))
                                       })) as tbl:
                        tbl_rows = [] if not tbl else tbl.select('tr')
                        for x, tr in enumerate(tbl_rows):
                            row_text = tr.get_text().lower()
                            if not ('torrent' in row_text
                                    and 'size' in row_text):
                                tr.decompose()
                            else:
                                break
                            if 5 < x:
                                break
                        tbl_rows = [] if not tbl else tbl.select('tr')

                        if not len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers, size = [
                                    try_int(n.replace(',', ''), n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in ('seed', 'leech', 'size')
                                    ]
                                ]
                                if self._reject_item(seeders, leechers):
                                    continue

                                anchors = tr.td.find_all('a')
                                stats = anchors and [
                                    len(a.get_text()) for a in anchors
                                ]
                                anchor = stats and anchors[stats.index(
                                    max(stats))]
                                title = anchor and anchor.get_text().strip()
                                download_url = anchor and self._link(
                                    anchor.get('href'))
                            except (AttributeError, TypeError, ValueError,
                                    IndexError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 15
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'info': r'.*?details\s*-\s*',
                       'get': 'download'
                   })])
        log = ''
        if self.filter:
            non_marked = 'f0' in self.filter
            # if search_any, use unselected to exclude, else use selected to keep
            filters = ([f for f in self.may_filter if f in self.filter],
                       [f for f in self.may_filter
                        if f not in self.filter])[non_marked]
            filters += ((
                (all([x in filters
                      for x in ('free', 'double')]) and ['freedouble'] or []) +
                (all([x in filters
                      for x in ('half', 'double')]) and ['halfdouble'] or [])
            ), ((not all([x not in filters
                          for x in ('free', 'double')]) and ['freedouble']
                 or []) +
                (not all([x not in filters
                          for x in ('half', 'double')]) and ['halfdouble']
                 or [])))[non_marked]
            rc['filter'] = re.compile('(?i)^(%s)$' % '|'.join([
                '%s' % f for f in filters
                if (f in self.may_filter and self.may_filter[f][1]) or f
            ]))
            log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join([
                f in self.may_filter and self.may_filter[f][0] or f
                for f in filters
            ]))
        for mode in search_params:
            if mode in ['Season', 'Episode']:
                show_type = self.show_obj.air_by_date and 'Air By Date' \
                            or self.show_obj.is_sports and 'Sports' or self.show_obj.is_anime and 'Anime' or None
                if show_type:
                    logger.log(
                        u'Provider does not carry shows of type: [%s], skipping'
                        % show_type, logger.DEBUG)
                    return results

            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % ('+'.join(
                    search_string.split()), self._categories_string(mode, ''))

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(
                            html,
                            parse_only=dict(table={
                                'class': (lambda at: at and 'table' in at)
                            })) as tbl:
                        tbl_rows = [] if not tbl else tbl.find_all('tr')

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells) or (self.confirmed and tr.find(
                                    'i', title=re.compile('(?i)unverified'))):
                                continue
                            if any(self.filter):
                                marked = ','.join([
                                    x.attrs.get('title', '').lower()
                                    for x in tr.find_all(
                                        'i',
                                        attrs={
                                            'class': [
                                                'fa-star', 'fa-diamond',
                                                'fa-star-half-o'
                                            ]
                                        })
                                ])
                                munged = ''.join(
                                    filter_iter(marked.__contains__,
                                                ['free', 'half', 'double']))
                                # noinspection PyUnboundLocalVariable
                                if ((non_marked
                                     and rc['filter'].search(munged)) or
                                    (not non_marked
                                     and not rc['filter'].search(munged))):
                                    continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers, size = [
                                    try_int(n, n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in ('seed', 'leech', 'size')
                                    ]
                                ]
                                if self._reject_item(seeders, leechers):
                                    continue

                                title = rc['info'].sub(
                                    '',
                                    tr.find('a', attrs={'title':
                                                        rc['info']})['title'])
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError,
                                    IndexError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode,
                                 len(items[mode]) - cnt, log + search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 16
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({
            'info': 'detail', 'cats': 'cat=(?:%s)' % self._categories_string(template='', delimiter='|'),
            'get': 'download'})])
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % (self._categories_string(), search_string)

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, parse_only=dict(table={'id': 'torrentsTable'})) as tbl:
                        tbl_rows = [] if not tbl else tbl.find_all('tr')

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[1:]:
                            cells = tr.find_all('td')
                            if 4 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [try_int(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size')]]
                                if None is tr.find('a', href=rc['cats']) or self._reject_item(seeders, leechers):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (info.attrs.get('title') or info.get_text()).strip()
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 17
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'show_id': r'"show\?id=(\d+)[^>]+>([^<]+)<\/a>',
                       'get': 'load_torrent'
                   })])
        search_types = sorted([x for x in iteritems(search_params)],
                              key=lambda tup: tup[0],
                              reverse=True)
        maybe_only = search_types[0][0]
        show_detail = '_only' in maybe_only and search_params.pop(
            maybe_only)[0] or ''
        for mode in search_params:
            for search_string in search_params[mode]:
                if 'Cache' == mode:
                    search_url = self.urls['browse']
                    html = self.get_url(search_url)
                    if self.should_skip():
                        return results
                else:
                    search_string = unidecode(search_string)
                    search_string = search_string.replace(show_detail,
                                                          '').strip()
                    search_url = self.urls['search'] % search_string
                    html = self.get_url(search_url)
                    if self.should_skip():
                        return results

                    shows = rc['show_id'].findall(html)
                    if any(shows):
                        html = ''
                        for show in set(shows):
                            sid, title = show
                            if title in unquote_plus(search_string):
                                html and time.sleep(1.1)
                                html += self.get_url(self.urls['show'] % sid)
                                if self.should_skip():
                                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html) as tbl:
                        tbl_rows = tbl.tbody and tbl.tbody.find_all(
                            'tr') or tbl.table and tbl.table.find_all('tr')

                        if 2 > len(tbl_rows or []):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[0:]:
                            cells = tr.find_all('td')
                            if 4 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                stats = cells[head['leech']].get_text().strip()
                                seeders, leechers = [
                                    (try_int(x[0], 0), try_int(x[1], 0))
                                    for x in re.findall(
                                        r'(?::(\d+))(?:\W*[/]\W*:(\d+))?',
                                        stats) if x[0]
                                ][0]
                                if self._reject_item(seeders, leechers):
                                    continue
                                sizes = [
                                    (try_int(x[0], x[0]), try_int(x[1], False))
                                    for x in re.findall(
                                        r'([\d.]+\w+)?(?:\s*[(\[](\d+)[)\]])?',
                                        stats) if x[0]
                                ][0]
                                size = sizes[(0, 1)[1 < len(sizes)]]

                                for element in [
                                        x for x in cells[2].contents[::-1]
                                        if text_type(x).strip()
                                ]:
                                    if 'NavigableString' in str(
                                            element.__class__):
                                        title = text_type(element).strip()
                                        break

                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 18
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({'info': 'details', 'get': 'download'})])
        log = ''
        if self.filter:
            non_marked = 'f0' in self.filter
            # if search_any, use unselected to exclude, else use selected to keep
            filters = ([f for f in self.may_filter if f in self.filter],
                       [f for f in self.may_filter if f not in self.filter])[non_marked]
            rc['filter'] = re.compile('(?i)(%s).png' % '|'.join(
                [f.replace('f', '') for f in filters if self.may_filter[f][1]]))
            log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join([self.may_filter[f][0] for f in filters]))

        for mode in search_params:
            rc['cats'] = re.compile('(?i)category=(?:%s)' % self._categories_string(mode, template='', delimiter='|'))
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % (
                    search_string,
                    self._categories_string(mode, template='category[]=%s')
                        .replace('&category[]=4489', ('&genre[]=Animation', '')[mode in ['Cache', 'Propers']]))
                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    html = re.sub(r'(?ims)<div[^>]+display:\s*none;.*?</div>', '', html)
                    html = re.sub('(?im)href=([^\\"][^>]+)>', r'href="\1">', html)
                    html = (html.replace('"/></td>', '" /></a></td>')
                            .replace('"title="', '" title="')
                            .replace('</u></span></a></td>', '</u></a></span></td>'))
                    html = re.sub('(?im)<b([mtwfs][^>]+)', r'<b>\1</b', html)

                    with BS4Parser(html, attr='width="100%"') as soup:
                        tbl_rows = [tr for tr in ([] if not soup else soup.find_all('tr'))
                                    if tr.find('a', href=rc['info'])]

                        if not len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows:
                            cells = tr.find_all('td')
                            # noinspection PyUnboundLocalVariable
                            if (6 > len(cells) or any(self.filter)
                                and ((non_marked and tr.find('img', src=rc['filter']))
                                     or (not non_marked and not tr.find('img', src=rc['filter'])))):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers, size = [try_int(n, n) for n in [
                                    cells[head[x]].get_text().strip() for x in ('seed', 'leech', 'size')]]
                                if not tr.find('a', href=rc['cats']) or self._reject_item(seeders, leechers):
                                    continue
                                title = tr.find('a', href=rc['info']).get_text().strip()
                                download_url = self._link(tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError, IndexError):
                                continue

                            if title and download_url:
                                items[mode].append((title, download_url, seeders, self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, log + search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 19
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'info': 'details',
                       'get': 'download'
                   })])
        for mode in search_params:
            rc['cats'] = re.compile(
                '(?i)cat=(?:%s)' %
                self._categories_string(mode, template='', delimiter='|'))
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)

                html = self.get_url(
                    self.urls['search'] %
                    ('+'.join(search_string.split()),
                     self._categories_string(mode, template='cats[]=%s')))
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html) as soup:
                        tbl_rows = soup.find_all('div', 'torrentrow')

                        if not len(tbl_rows):
                            raise generic.HaltParseException

                        for tr in tbl_rows:
                            cells = tr.select('span[style*="cell"]')
                            if 6 > len(cells):
                                continue
                            try:
                                seeders, leechers, size = [
                                    try_int(n, n) for n in [
                                        cells[x].get_text().strip()
                                        for x in (-3, -2, -5)
                                    ]
                                ]
                                if not tr.find(
                                        'a',
                                        href=rc['cats']) or self._reject_item(
                                            seeders, leechers):
                                    continue

                                title = tr.find(
                                    'a', href=rc['info']).get_text().strip()
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError,
                                    IndexError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode,
                                 len(items[mode]) - cnt,
                                 self.session.response.get('url'))

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 20
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'info': 'detail',
                       'get': 'download',
                       'fl': r'\(Freeleech\)'
                   })])
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % (
                    search_string, self._categories_string(mode),
                    ('3', '0')[not self.freeleech])

                html = self.get_url(search_url, timeout=90)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    html = html.replace(
                        '<table width=100% border=0 align=center cellpadding=0 cellspacing=0>',
                        '')
                    html = re.sub(
                        r'(?s)(.*)(<table[^>]*?950[^>]*>.*)(</body>)', r'\1\3',
                        html)
                    html = re.sub(r'(?s)<table[^>]+font[^>]+>',
                                  '<table id="parse">', html)
                    html = re.sub(
                        r'(?s)(<td[^>]+>(?!<[ab]).*?)(?:(?:</[ab]>)+)', r'\1',
                        html)
                    html = re.sub(r'(?m)^</td></tr></table>', r'', html)
                    with BS4Parser(
                            html,
                            parse_only=dict(table={'id': 'parse'})) as tbl:
                        tbl_rows = [] if not tbl else tbl.find_all('tr')

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers, size = [
                                    try_int(n, n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in ('seed', 'leech', 'size')
                                    ]
                                ]
                                if self._reject_item(seeders, leechers):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (info.attrs.get('title') or
                                         info.get_text().split()[0]).strip()
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError,
                                    KeyError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 21
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'info': r'/torrents?/(?P<tid>(?P<tid_num>\d{2,})[^"]*)',
                       'get': 'download'
                   })])
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(unquote_plus(search_string))

                vals = [i for i in range(5, 16)]
                random.SystemRandom().shuffle(vals)
                attempts = html = soup = tbl = None
                fetch = 'failed fetch'
                for attempts, s in enumerate((0, vals[0], vals[5], vals[10])):
                    time.sleep(s)
                    html = self.get_url(self.urls['search'] %
                                        (search_string, self._token))
                    if self.should_skip():
                        return results
                    if html:
                        try:
                            soup = BS4Parser(html).soup
                            tbl = soup.find('table', class_='table')
                            if tbl:
                                fetch = 'data fetched'
                                break
                        except (BaseException, Exception):
                            pass
                if attempts:
                    logger.log('%s %s after %s attempts' %
                               (mode, fetch, attempts + 1))

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html) or not tbl:
                        raise generic.HaltParseException

                    tbl_rows = tbl.find_all('tr')

                    if 2 > len(tbl_rows):
                        raise generic.HaltParseException

                    head = None
                    for tr in tbl_rows[1:]:
                        cells = tr.find_all('td')
                        if 6 > len(cells):
                            continue
                        try:
                            head = head if None is not head else self._header_row(
                                tr)
                            seeders, leechers, size = [
                                try_int(n, n) for n in [
                                    cells[head[x]].get_text().strip()
                                    for x in ('seed', 'leech', 'size')
                                ]
                            ]
                            if self._reject_item(
                                    seeders, leechers, self.freeleech and
                                (None is tr.find('i', class_='fa-star'))):
                                continue

                            title = tr.find(
                                'a', href=rc['info']).get_text().strip()
                            download_url = self._link(
                                tr.find('a', href=rc['get'])['href'])
                        except (BaseException, Exception):
                            continue

                        try:
                            titles = self.regulate_title(
                                title, mode, search_string)
                            if download_url and titles:
                                for title in titles:
                                    items[mode].append(
                                        (title, download_url, seeders,
                                         self._bytesizer(size)))
                        except (BaseException, Exception):
                            pass

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                if soup:
                    soup.clear(True)
                    del soup

                self._log_search(mode,
                                 len(items[mode]) - cnt,
                                 ('search string: ' +
                                  search_string.replace('%', '%%'),
                                  self.name)['Cache' == mode])

                if mode in 'Season' and len(items[mode]):
                    break

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 22
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if self.show_obj and not self.show_obj.is_anime:
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({'nodots': r'[\.\s]+'})])

        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)

                search_url = self.urls['browse'] if 'Cache' == mode else \
                    self.urls['search'] % (rc['nodots'].sub(' ', search_string), str(time.time()).replace('.', '3'))

                data, html = 2 * [None]
                if 'Cache' == mode:
                    data = self.cache.get_rss(search_url)
                else:
                    html = self.get_url(search_url)

                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if None is not data:
                        for cur_item in data.get('entries', []):
                            title, download_url = cur_item.get(
                                'title'), self._link(cur_item.get('link'))
                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, '', ''))
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser('<html><body>%s</body></html>' %
                                   html) as soup:
                        for link in soup.find_all('a'):
                            try:
                                variants = map_list(
                                    lambda t: t.get_text().replace(
                                        'SD', '480p'),
                                    link.find_all('span', class_='badge'))
                                map_consume(
                                    lambda t: t.decompose(),
                                    link.find_all('span') +
                                    link.find_all('div'))
                                title = '[HorribleSubs] ' + re.sub(
                                    r'\s*\[HorribleSubs\]\s*', '',
                                    link.get_text())
                                download_url = self._link(link.get('href'))
                                if title and download_url:
                                    items[mode] += map_list(
                                        lambda _v:
                                        ('%s [%s]' % (title, _v), '%s-%s' %
                                         (download_url, _v), '', ''), variants)
                            except (AttributeError, TypeError, ValueError):
                                continue

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 23
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'info': 'detail',
                       'get': 'download',
                       'fl': 'free'
                   })])
        for mode in search_params:
            save_url, restore = self._set_categories(mode)
            if self.should_skip():
                return results
            for search_string in search_params[mode]:
                search_string = search_string.replace(u'£', '%')
                search_string = re.sub(r'[\s.]+', '%', search_string)
                search_string = unidecode(search_string)

                kwargs = dict(
                    post_data={
                        'keywords': search_string,
                        'do': 'quick_sort',
                        'page': '0',
                        'category': '0',
                        'search_type': 't_name',
                        'sort': 'added',
                        'order': 'desc',
                        'daysprune': '-1'
                    })

                html = self.get_url(self.urls['search'], **kwargs)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    parse_only = dict(table={
                        'id': (lambda at: at and 'sortabletable' in at)
                    })
                    with BS4Parser(html, parse_only=parse_only) as tbl:
                        tbl_rows = [] if not tbl else tbl.find_all('tr')
                        get_detail = True

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[1:]:
                            cells = tr.find_all('td')
                            if 6 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers, size = [
                                    try_int(n, n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in ('seed', 'leech', 'size')
                                    ]
                                ]
                                if self._reject_item(
                                        seeders, leechers, self.freeleech
                                        and (None is cells[1].find(
                                            'img', title=rc['fl']))):
                                    continue

                                info = tr.find('a', href=rc['info'])
                                title = (tr.find(
                                    'div',
                                    class_='tooltip-content').get_text()
                                         or info.get_text()).strip()
                                title = re.findall('(?m)(^[^\r\n]+)', title)[0]
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (BaseException, Exception):
                                continue

                            if get_detail and title.endswith('...'):
                                try:
                                    with BS4Parser(
                                            self.get_url('%s%s' % (
                                                self.urls[
                                                    'config_provider_home_uri'],
                                                info['href'].lstrip('/').
                                                replace(
                                                    self.urls[
                                                        'config_provider_home_uri'],
                                                    '')))) as soup_detail:
                                        title = soup_detail.find(
                                            'td',
                                            class_='thead',
                                            attrs={
                                                'colspan': '3'
                                            }).get_text().strip()
                                        title = re.findall(
                                            '(?m)(^[^\r\n]+)', title)[0]
                                except IndexError:
                                    continue
                                except (BaseException, Exception):
                                    get_detail = False

                            title = self.regulate_title(title)
                            if download_url and title:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode,
                                 len(items[mode]) - cnt,
                                 ('search string: ' +
                                  search_string.replace('%', '%%'),
                                  self.name)['Cache' == mode])

                if mode in 'Season' and len(items[mode]):
                    break

            if save_url:
                self.get_url(save_url, post_data=restore)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 24
0
    def _search_provider(self, search_params, **kwargs):

        results = []

        if self.show_obj and not self.show_obj.is_anime:
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'info': 'view',
                       'get': '(?:torrent|magnet:)'
                   })])
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % (
                    (0, 2)[self.confirmed], search_string.replace('.', ' '))

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    parse_only = dict(table={
                        'class': (lambda at: at and 'torrent-list' in at)
                    })
                    with BS4Parser(html, parse_only=parse_only) as tbl:
                        tbl_rows = [] if not tbl else tbl.find_all('tr')

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers, size = [
                                    try_int(n, n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in ('seed', 'leech', 'size')
                                    ]
                                ]
                                if self._reject_item(seeders, leechers):
                                    continue

                                title = tr.find(
                                    'a', href=rc['info']).get_text().strip()
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError,
                                    IndexError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 25
0
    def _search_provider(self, search_params, **kwargs):

        results = []

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'abd': r'(\d{4}(?:[.]\d{2}){2})',
                       'peers': r'Seed[^\d]*(\d+)[\w\W]*?Leech[^\d]*(\d+)',
                       'info': r'(\w+)[.]html',
                       'get': r'^magnet:'
                   })])
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_string = '+'.join(rc['abd'].sub(r'%22\1%22',
                                                       search_string).split())
                search_url = self.urls['search'] % (
                    search_string,
                    self._categories_string(mode, '', ',') + ' %2Blang%3Aen',
                    ('ns', 'dt')['Cache' == mode])

                html = self.get_url(search_url)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException
                    html = html.replace('</a> </i>', '</a>').replace(
                        '"href=', '" href=').replace('"style', '" style')
                    parse_only = dict(table={
                        'class': (lambda at: at and 'table-torrents' in at)
                    })
                    with BS4Parser(html, parse_only=parse_only) as tbl:
                        tbl_rows = [] if not tbl else tbl.find_all('tr')

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[1:]:
                            cells = tr.find_all('td')
                            if 4 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr, {
                                        'peers': r'(?:zqf\-clou)',
                                        'size': r'(?:zqf\-file)',
                                        'down': r'(?:zqf\-down)'
                                    })
                                stats = rc['peers'].findall(
                                    (cells[head['peers']].find(
                                        class_='progress')
                                     or {}).get('title', ''))
                                seeders, leechers = any(stats) and [
                                    try_int(x) for x in stats[0]
                                ] or (0, 0)
                                if self._reject_item(seeders, leechers):
                                    continue
                                for cell in (1, 0):
                                    info = cells[cell].find('a')
                                    if ''.join(re.findall(r'[a-z0-9]+', info.get_text().lower())) in \
                                            re.sub(r'html\?.*', '', ''.join(
                                                re.findall(r'[a-z0-9?]+', info['href'].lower()))):
                                        break
                                else:
                                    info = cells[1].find(
                                        'a', href=rc['info']) or cells[0].find(
                                            'a', href=rc['info'])
                                title = info.get_text().strip()
                                size = cells[head['size']].get_text().strip()
                                download_url = cells[head['down']].find(
                                    'a', href=rc['get'])['href']
                            except (AttributeError, TypeError, ValueError,
                                    IndexError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 26
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self.url:
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        quote_fx = (lambda t: quote(t, safe='~()*!.\''))
        for mode in search_params:
            for search_string in search_params[mode]:
                search_url = self.url
                cnt = len(items[mode])
                try:
                    for token in self._get_tokens():
                        if self.should_skip():
                            return results
                        if not token:
                            continue

                        params = dict(token=token[0], ent=token[1])
                        if 'Cache' != mode:
                            params.update(
                                {'ss': quote_fx(unidecode(search_string))})

                        data_json = None
                        vals = [i for i in range(3, 8)]
                        random.SystemRandom().shuffle(vals)
                        for x in vals[0], vals[2], vals[4]:
                            time.sleep(x)
                            params.update(dict(ts=self.ts()))
                            search_url = self.urls[
                                ('search', 'browse')['Cache' == mode]] % params
                            # decode json below as get resp will false -ve to 'nodata' when no search results
                            html_json = self.get_url(search_url)
                            if None is not html_json:
                                data_json = json.loads(html_json)
                                if data_json or 'Cache' != mode:
                                    break
                            if self.should_skip():
                                return results

                        for item in filter_iter(
                                lambda di: re.match(
                                    '(?i).*?(tv|television)',
                                    di.get('type', '') or di.get(
                                        'category', '')) and
                            (not self.confirmed or di.get('trusted') or di.get(
                                'verified')), data_json or {}):
                            seeders, leechers, size = map_list(
                                lambda arg: try_int(*([
                                    item.get(arg[0]) if None is not item.get(
                                        arg[0]) else item.get(arg[1])
                                ]) * 2),
                                (('seeder', 'seed'), ('leecher', 'leech'),
                                 ('size', 'size')))
                            if self._reject_item(seeders, leechers):
                                continue
                            title = item.get('name') or item.get('title')
                            download_url = item.get('magnet') or item.get(
                                'magnetLink')
                            if not download_url:
                                source = item.get('site') or item.get('source')
                                link = self._link(
                                    item.get('url') or item.get('pageLink'))
                                if not source or not link:
                                    continue
                                download_url = self.urls['get'] % dict(
                                    token=token[0],
                                    src=quote_fx(source),
                                    url=b64encodestring(quote_fx(link)),
                                    ts='%(ts)s')
                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders, size))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 27
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k, v) in iteritems({
                       'info': 'detail',
                       'get': 'download',
                       'nuked': 'nuke',
                       'filter': 'free'
                   })])
        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % (
                    search_string, self._categories_string(mode, '%s', ','))

                html = self.get_url(search_url, timeout=90)
                if self.should_skip():
                    return results

                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        raise generic.HaltParseException

                    with BS4Parser(html, attr='cellpadding="5"') as soup:
                        tbl = soup.find('table', class_='browse')
                        tbl_rows = [] if not tbl else tbl.find_all('tr')

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            try:
                                info = tr.find('a', href=rc['info'])
                                head = head if None is not head else self._header_row(
                                    tr)
                                seeders, leechers, size = [
                                    n for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in ('leech', 'leech', 'size')
                                    ]
                                ]
                                seeders, leechers, size = [
                                    try_int(n, n) for n in list(
                                        re.findall(r'^(\d+)[^\d]+?(\d+)',
                                                   leechers)[0]) +
                                    re.findall('^[^\n\t]+', size)
                                ]
                                if self._reject_item(
                                        seeders, leechers, self.freeleech and
                                    (not tr.find('a', class_=rc['filter'])),
                                        self.confirmed and (any([
                                            tr.find('img', alt=rc['nuked']),
                                            tr.find('img', class_=rc['nuked'])
                                        ]))):
                                    continue

                                title = (info.attrs.get('title')
                                         or info.get_text()).strip()
                                download_url = self._link(
                                    tr.find('a', href=rc['get'])['href'])
                            except (AttributeError, TypeError, ValueError,
                                    KeyError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 28
0
    def _search_provider(self, search_params, **kwargs):

        results = []
        if not self._authorised():
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v))
                   for (k,
                        v) in iteritems({
                            'info': 'torrents',
                            'get': '(.*?download)(?:_check)?(.*)'
                        })])
        log = ''
        if self.filter:
            non_marked = 'f0' in self.filter
            # if search_any, use unselected to exclude, else use selected to keep
            filters = ([f for f in self.may_filter if f in self.filter],
                       [f for f in self.may_filter
                        if f not in self.filter])[non_marked]
            filters += ((
                (all([x in filters for x in ('free', 'double', 'feat')])
                 and ['freedoublefeat'] or []) +
                (all([x in filters
                      for x in ('free', 'double')]) and ['freedouble'] or []) +
                (all([x in filters
                      for x in ('feat', 'double')]) and ['featdouble'] or [])
            ), ((
                not all([x not in filters for x in ('free', 'double', 'feat')])
                and ['freedoublefeat'] or []) +
                (not all([x not in filters
                          for x in ('free', 'double')]) and ['freedouble']
                 or []) +
                (not all([x not in filters
                          for x in ('feat', 'double')]) and ['featdouble']
                 or [])))[non_marked]
            rc['filter'] = re.compile(r'(?i)^(%s)$' % '|'.join([
                '%s' % f for f in filters
                if (f in self.may_filter and self.may_filter[f][1]) or f
            ]))
            log = '%sing (%s) ' % (('keep', 'skipp')[non_marked], ', '.join([
                f in self.may_filter and self.may_filter[f][0] or f
                for f in filters
            ]))
        for mode in search_params:
            if mode in ['Season', 'Episode']:
                show_type = self.show_obj.air_by_date and 'Air By Date' \
                            or self.show_obj.is_sports and 'Sports' or None
                if show_type:
                    logger.log(
                        u'Provider does not carry shows of type: [%s], skipping'
                        % show_type, logger.DEBUG)
                    return results

            for search_string in search_params[mode]:
                search_string = unidecode(search_string)
                search_url = self.urls['search'] % (
                    self.token, search_string.replace('.', ' '),
                    self._categories_string(template=''), '', '', '')

                resp = self.get_url(search_url)
                if self.should_skip():
                    return results

                resp_json = None
                if None is not self.resp:
                    try:
                        resp_json = json.loads(resp)
                    except (BaseException, Exception):
                        pass

                cnt = len(items[mode])
                try:
                    if not resp or (resp_json and not resp_json.get('rows')):
                        raise generic.HaltParseException

                    html = '<html><body>%s</body></html>' % \
                           (resp if None is self.resp else
                            self.resp.replace('</tbody>', '%s</tbody>' % ''.join(resp_json.get('result', []))))
                    with BS4Parser(
                            html,
                            parse_only=dict(table={
                                'class': (lambda at: at and 'table' in at)
                            })) as tbl:
                        tbl_rows = [] if not tbl else tbl.find_all('tr')

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl_rows[1:]:
                            cells = tr.find_all('td')
                            if 5 > len(cells):
                                continue
                            if any(self.filter):
                                marked = ','.join([
                                    x.attrs.get('data-original-title',
                                                '').lower()
                                    for x in tr.find_all(
                                        'i',
                                        attrs={
                                            'class': [
                                                'text-gold', 'fa-diamond',
                                                'fa-certificate'
                                            ]
                                        })
                                ])
                                # noinspection PyTypeChecker
                                munged = ''.join(
                                    filter_iter(marked.__contains__,
                                                ['free', 'double', 'feat']))
                                # noinspection PyUnboundLocalVariable
                                if ((non_marked
                                     and rc['filter'].search(munged)) or
                                    (not non_marked
                                     and not rc['filter'].search(munged))):
                                    continue
                            try:
                                head = head if None is not head else self._header_row(
                                    tr, {
                                        'seed': r'circle-up',
                                        'leech': r'circle-down',
                                        'size': r'fa-file'
                                    })
                                seeders, leechers, size = [
                                    try_int(n, n) for n in [
                                        cells[head[x]].get_text().strip()
                                        for x in ('seed', 'leech', 'size')
                                    ]
                                ]
                                if self._reject_item(seeders, leechers):
                                    continue

                                title = tr.find('a', href=rc['info'])
                                title = title.get_text().strip(
                                ) if None is self.resp else title[
                                    'data-original-title']
                                download_url = self._link(''.join(
                                    rc['get'].findall(
                                        tr.find('a',
                                                href=rc['get'])['href'])[0]))
                            except (AttributeError, TypeError, ValueError,
                                    IndexError):
                                continue

                            if title and download_url:
                                items[mode].append(
                                    (title, download_url, seeders,
                                     self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(
                        u'Failed to parse. Traceback: %s' %
                        traceback.format_exc(), logger.ERROR)

                self._log_search(mode,
                                 len(items[mode]) - cnt, log + search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 29
0
    def _search_provider(self, search_params, search_mode='eponly', epcount=0, **kwargs):

        results = []
        if not self.url:
            return results

        items = {'Cache': [], 'Season': [], 'Episode': [], 'Propers': []}

        rc = dict([(k, re.compile('(?i)' + v)) for (k, v) in iteritems({
            'info': 'detail', 'get': 'download[^"]+magnet', 'tid': r'.*/(\d{5,}).*',
            'verify': '(?:helper|moderator|trusted|vip)', 'size': r'size[^\d]+(\d+(?:[.,]\d+)?\W*[bkmgt]\w+)'})])

        for mode in search_params:
            for search_string in search_params[mode]:
                search_string = unidecode(search_string)

                s_mode = 'browse' if 'Cache' == mode else 'search'
                for i in ('', '2'):
                    search_url = self.urls['%s%s' % (s_mode, i)]
                    if 'Cache' != mode:
                        search_url = search_url % quote(search_string)

                    html = self.get_url(search_url)
                    if self.should_skip():
                        return results

                    if html and not self._has_no_results(html):
                        break
                        
                cnt = len(items[mode])
                try:
                    if not html or self._has_no_results(html):
                        self._url = None
                        raise generic.HaltParseException

                    with BS4Parser(html, parse_only=dict(table={'id': 'searchResult'})) as tbl:
                        tbl_rows = [] if not tbl else tbl.find_all('tr')

                        if 2 > len(tbl_rows):
                            raise generic.HaltParseException

                        head = None
                        for tr in tbl.find_all('tr')[1:]:
                            cells = tr.find_all('td')
                            if 3 > len(cells):
                                continue
                            try:
                                head = head if None is not head else self._header_row(tr)
                                seeders, leechers = [try_int(cells[head[x]].get_text().strip())
                                                     for x in ('seed', 'leech')]
                                if self._reject_item(seeders, leechers):
                                    continue

                                info = tr.find('a', title=rc['info'])
                                title = info.get_text().strip().replace('_', '.')
                                tid = rc['tid'].sub(r'\1', str(info['href']))
                                download_magnet = tr.find('a', title=rc['get'])['href']
                            except (AttributeError, TypeError, ValueError):
                                continue

                            if self.confirmed and not tr.find('img', title=rc['verify']):
                                logger.log(u'Skipping untrusted non-verified result: ' + title, logger.DEBUG)
                                continue

                            # Check number video files = episode in season and
                            # find the real Quality for full season torrent analyzing files in torrent
                            if 'Season' == mode and 'sponly' == search_mode:
                                ep_number = int(epcount // len(set(show_name_helpers.allPossibleShowNames(
                                    self.show_obj))))
                                title = self._find_season_quality(title, tid, ep_number)

                            if title and download_magnet:
                                size = None
                                try:
                                    size = rc['size'].findall(tr.find_all(class_='detDesc')[0].get_text())[0]
                                except (BaseException, Exception):
                                    pass

                                items[mode].append((title, download_magnet, seeders, self._bytesizer(size)))

                except generic.HaltParseException:
                    pass
                except (BaseException, Exception):
                    logger.log(u'Failed to parse. Traceback: %s' % traceback.format_exc(), logger.ERROR)
                self._log_search(mode, len(items[mode]) - cnt, search_url)

            results = self._sort_seeding(mode, results + items[mode])

        return results
Exemplo n.º 30
0
    def html(self, mode, search_string, results):

        if 'Content-Type' in self.session.headers:
            del (self.session.headers['Content-Type'])
        setattr(
            self.session, 'reserved', {
                'headers': {
                    'Accept': 'text/html, application/xhtml+xml, */*',
                    'Accept-Language': 'en-GB',
                    'Cache-Control': 'no-cache',
                    'Referer': 'https://broadcasthe.net/login.php',
                    'User-Agent': self.ua
                }
            })
        self.headers = None

        if self.auth_html or self._authorised_html():
            del (self.session.reserved['headers']['Referer'])
            if 'Referer' in self.session.headers:
                del (self.session.headers['Referer'])
            self.auth_html = True

            search_string = unidecode(search_string)
            search_url = self.urls['search'] % (search_string,
                                                self._categories_string(
                                                    mode, 'filter_cat[%s]=1'))

            html = self.get_url(search_url, use_tmr_limit=False)
            if self.should_skip(log_warning=False, use_tmr_limit=False):
                return results

            cnt = len(results)
            try:
                if not html or self._has_no_results(html):
                    raise generic.HaltParseException

                with BS4Parser(html) as soup:
                    tbl = soup.find(id='torrent_table')
                    tbl_rows = [] if not tbl else tbl.find_all('tr')

                    if 2 > len(tbl_rows):
                        raise generic.HaltParseException

                    rc = dict([
                        (k, re.compile('(?i)' + v)) for (k, v) in iteritems({
                            'cats':
                            r'(?i)cat\[(?:%s)\]' % self._categories_string(
                                mode, template='', delimiter='|'),
                            'get':
                            'download'
                        })
                    ])

                    head = None
                    for tr in tbl_rows[1:]:
                        cells = tr.find_all('td')
                        if 5 > len(cells):
                            continue
                        try:
                            head = head if None is not head else self._header_row(
                                tr)
                            seeders, leechers, size = [
                                try_int(n, n) for n in [
                                    cells[head[x]].get_text().strip()
                                    for x in ('seed', 'leech', 'size')
                                ]
                            ]
                            if not tr.find(
                                    'a', href=rc['cats']) or self._reject_item(
                                        seeders,
                                        leechers,
                                        container=self.reject_m2ts and
                                        (re.search(r'(?i)\[.*?m2?ts.*?\]',
                                                   tr.get_text('',
                                                               strip=True)))):
                                continue

                            title = tr.select('td span[title]')[0].attrs.get(
                                'title').strip()
                            download_url = self._link(
                                tr.find('a', href=rc['get'])['href'])
                        except (AttributeError, TypeError, ValueError,
                                KeyError, IndexError):
                            continue

                        if title and download_url:
                            results.append((title, download_url, seeders,
                                            self._bytesizer(size)))

            except generic.HaltParseException:
                pass
            except (BaseException, Exception):
                logger.log(
                    u'Failed to parse. Traceback: %s' % traceback.format_exc(),
                    logger.ERROR)

            self._log_search(mode, len(results) - cnt, search_url)

            results = self._sort_seeding(mode, results)

        return results