Exemplo n.º 1
0
def extract_torrents(provider, client):
    """ Main torrent extraction generator for non-API based providers

    Args:
        provider  (str): Provider ID
        client (Client): Client class instance

    Yields:
        tuple: A torrent result
    """
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))
    log.debug("[%s] Extracting torrents from %s using definitions: %s" %
              (provider, provider, repr(definition)))

    if not client.content:
        if get_setting("use_debug_parser", bool):
            log.debug("[%s] Parser debug | Page content is empty" % provider)

        raise StopIteration

    dom = Html().feed(client.content)

    key_search = get_search_query(definition, "key")
    row_search = get_search_query(definition, "row")
    name_search = get_search_query(definition, "name")
    torrent_search = get_search_query(definition, "torrent")
    info_hash_search = get_search_query(definition, "infohash")
    size_search = get_search_query(definition, "size")
    seeds_search = get_search_query(definition, "seeds")
    peers_search = get_search_query(definition, "peers")
    referer_search = get_search_query(definition, "referer")

    log.debug("[%s] Parser: %s" % (provider, repr(definition['parser'])))

    q = Queue()
    threads = []
    needs_subpage = 'subpage' in definition and definition['subpage']

    if needs_subpage:

        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash,
                            referer):
            try:
                log.debug("[%s] Getting subpage at %s" %
                          (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" %
                          (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey
            headers = {}

            if "subpage_mode" in definition:
                if definition["subpage_mode"] == "xhr":
                    headers['X-Requested-With'] = 'XMLHttpRequest'
                    headers['Content-Language'] = ''

            if referer:
                headers['Referer'] = referer

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(uri[0].encode('utf-8'), headers=headers)

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' %
                          (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(
                            uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error(
                        "[%s] Subpage extraction for %s failed with: %s" %
                        (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            ret = (name, info_hash, torrent, size, seeds, peers)
            q.put_nowait(ret)

    if not dom:
        if get_setting("use_debug_parser", bool):
            log.debug(
                "[%s] Parser debug | Could not parse DOM from page content" %
                provider)

        raise StopIteration

    if get_setting("use_debug_parser", bool):
        log.debug(
            "[%s] Parser debug | Page content: %s" %
            (provider, client.content.replace('\r', '').replace('\n', '')))

    key = eval(key_search) if key_search else ""
    if key_search and get_setting("use_debug_parser", bool):
        key_str = key.__str__()
        log.debug(
            "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" %
            (provider, 'key', key_search, key_str.replace('\r', '').replace(
                '\n', '')))

    items = eval(row_search)
    if get_setting("use_debug_parser", bool):
        log.debug("[%s] Parser debug | Matched %d items for '%s' query '%s'" %
                  (provider, len(items), 'row', row_search))

    for item in items:
        if get_setting("use_debug_parser", bool):
            item_str = item.__str__()
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'row', row_search, item_str.replace(
                    '\r', '').replace('\n', '')))

        if not item:
            continue

        try:
            name = eval(name_search) if name_search else ""
            torrent = eval(torrent_search) if torrent_search else ""
            size = eval(size_search) if size_search else ""
            seeds = eval(seeds_search) if seeds_search else ""
            peers = eval(peers_search) if peers_search else ""
            info_hash = eval(info_hash_search) if info_hash_search else ""
            referer = eval(referer_search) if referer_search else ""

            if 'magnet:?' in torrent:
                torrent = torrent[torrent.find('magnet:?'):]

            if get_setting("use_debug_parser", bool):
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'name', name_search, name))
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'torrent', torrent_search, torrent))
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'size', size_search, size))
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'seeds', seeds_search, seeds))
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'peers', peers_search, peers))
                if info_hash_search:
                    log.debug(
                        "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                        % (provider, 'info_hash', info_hash_search, info_hash))
                if referer_search:
                    log.debug(
                        "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                        % (provider, 'info_hash', referer_search, referer))

            # Pass client cookies with torrent if private
            if not torrent.startswith('magnet'):
                user_agent = USER_AGENT

                if client.passkey:
                    torrent = torrent.replace('PASSKEY', client.passkey)
                elif client.token:
                    headers = {
                        'Authorization': client.token,
                        'User-Agent': user_agent
                    }
                    log.debug("[%s] Appending headers: %s" %
                              (provider, repr(headers)))
                    torrent = append_headers(torrent, headers)
                    log.debug("[%s] Torrent with headers: %s" %
                              (provider, repr(torrent)))
                else:
                    parsed_url = urlparse(torrent.split('|')[0])
                    cookie_domain = '{uri.netloc}'.format(uri=parsed_url)
                    cookie_domain = re.sub('www\d*\.', '', cookie_domain)
                    cookies = []
                    for cookie in client._cookies:
                        if cookie_domain in cookie.domain:
                            cookies.append(cookie)
                    headers = {}
                    if cookies:
                        headers = {'User-Agent': user_agent}
                        log.debug("[%s] Cookies res: %s / %s" %
                                  (provider, repr(headers),
                                   repr(client.request_headers)))
                        if client.request_headers:
                            headers.update(client.request_headers)
                        if client.url:
                            headers['Referer'] = client.url
                            headers['Origin'] = client.url
                        # Need to set Cookie afterwards to avoid rewriting it with session Cookies
                        headers['Cookie'] = ";".join(
                            ["%s=%s" % (c.name, c.value) for c in cookies])
                    else:
                        headers = {'User-Agent': user_agent}

                    torrent = append_headers(torrent, headers)

            if name and torrent and needs_subpage and not torrent.startswith(
                    'magnet'):
                if not torrent.startswith('http'):
                    torrent = definition['root_url'] + torrent.encode('utf-8')
                t = Thread(target=extract_subpage,
                           args=(q, name, torrent, size, seeds, peers,
                                 info_hash, referer))
                threads.append(t)
            else:
                yield (name, info_hash, torrent, size, seeds, peers)
        except Exception as e:
            log.error("[%s] Got an exception while parsing results: %s" %
                      (provider, repr(e)))

    if needs_subpage:
        log.debug("[%s] Starting subpage threads..." % provider)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        for i in range(q.qsize()):
            ret = q.get_nowait()
            log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret)))
            yield ret
Exemplo n.º 2
0
def extract_torrents(provider, client):
    """ Main torrent extraction generator for non-API based providers

    Args:
        provider  (str): Provider ID
        client (Client): Client class instance

    Yields:
        tuple: A torrent result
    """
    definition = definitions[provider]
    log.debug("Extracting torrents from %s using definitions: %s" %
              (provider, repr(definition)))

    if not client.content:
        raise StopIteration

    dom = Html().feed(client.content)

    row_search = get_parser(definition, "row")
    name_search = get_parser(definition, "name")
    torrent_search = get_parser(definition, "torrent")
    info_hash_search = get_parser(definition, "infohash")
    size_search = get_parser(definition, "size")
    seeds_search = get_parser(definition, "seeds")
    peers_search = get_parser(definition, "peers")
    referer_search = get_parser(definition, "referer")

    log.debug("[%s] Parser: %s" % (provider, repr(definition['parser'])))

    q = Queue()
    threads = []
    needs_subpage = 'subpage' in definition and definition['subpage']

    if needs_subpage:

        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash,
                            referer):
            try:
                log.debug("[%s] Getting subpage at %s" %
                          (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" %
                          (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey

            headers = {}
            if "subpage_mode" in definition:
                if definition["subpage_mode"] == "xhr":
                    headers['X-Requested-With'] = 'XMLHttpRequest'
                    headers['Content-Language'] = ''
            if referer:
                headers['Referer'] = referer
            subclient.headers = headers

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(uri[0].encode('utf-8'))

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' %
                          (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(
                            uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error(
                        "[%s] Subpage extraction for %s failed with: %s" %
                        (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            ret = (name, info_hash, torrent, size, seeds, peers)
            q.put_nowait(ret)

    if not dom:
        raise StopIteration

    for item in eval(row_search):
        if not item:
            continue
        name = eval(name_search) if name_search else ""
        torrent = eval(torrent_search) if torrent_search else ""
        size = eval(size_search) if size_search else ""
        seeds = eval(seeds_search) if seeds_search else ""
        peers = eval(peers_search) if peers_search else ""
        info_hash = eval(info_hash_search) if info_hash_search else ""
        referer = eval(referer_search) if referer_search else ""

        if 'magnet:?' in torrent:
            torrent = torrent[torrent.find('magnet:?'):]

        # Pass client cookies with torrent if private
        if definition['private'] and not torrent.startswith('magnet'):
            user_agent = USER_AGENT

            if client.passkey:
                torrent = torrent.replace('PASSKEY', client.passkey)
            elif client.token:
                headers = {
                    'Authorization': client.token,
                    'User-Agent': user_agent
                }
                log.debug("[%s] Appending headers: %s" %
                          (provider, repr(headers)))
                torrent = append_headers(torrent, headers)
                log.debug("[%s] Torrent with headers: %s" %
                          (provider, repr(torrent)))
            else:
                log.debug("[%s] Cookies: %s" %
                          (provider, repr(client.cookies())))
                parsed_url = urlparse(definition['root_url'])
                cookie_domain = '{uri.netloc}'.format(uri=parsed_url)
                cookie_domain = re.sub('www\d*\.', '', cookie_domain)
                cookies = []
                for cookie in client._cookies:
                    if cookie_domain in cookie.domain:
                        cookies.append(cookie)
                headers = {'User-Agent': user_agent}
                if cookies:
                    headers['Cookie'] = ";".join(
                        ["%s=%s" % (c.name, c.value) for c in cookies])
                torrent = append_headers(torrent, headers)
                log.debug("[%s] Torrent with headers: %s" %
                          (provider, repr(torrent)))

        if name and torrent and needs_subpage and not torrent.startswith(
                'magnet'):
            if not torrent.startswith('http'):
                torrent = definition['root_url'] + torrent.encode('utf-8')
            t = Thread(target=extract_subpage,
                       args=(q, name, torrent, size, seeds, peers, info_hash,
                             referer))
            threads.append(t)
        else:
            yield (name, info_hash, torrent, size, seeds, peers)

    if needs_subpage:
        log.debug("[%s] Starting subpage threads..." % provider)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        log.debug("[%s] Threads returned: %s" % (provider, repr(threads)))

        for i in range(q.qsize()):
            ret = q.get_nowait()
            log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret)))
            yield ret
Exemplo n.º 3
0
def extract_torrents(provider, client):
    """ Main torrent extraction generator for non-API based providers

    Args:
        provider  (str): Provider ID
        client (Client): Client class instance

    Yields:
        tuple: A torrent result
    """
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))
    log.debug("Extracting torrents from %s using definitions: %s" %
              (provider, repr(definition)))

    if not client.content:
        raise StopIteration

    dom = Html().feed(client.content)

    row_search = "dom." + definition['parser']['row']
    name_search = definition['parser']['name']
    torrent_search = definition['parser']['torrent']
    info_hash_search = definition['parser']['infohash']
    size_search = definition['parser']['size']
    seeds_search = definition['parser']['seeds']
    peers_search = definition['parser']['peers']

    log.debug("[%s] Parser: %s" % (provider, repr(definition['parser'])))

    q = Queue()
    threads = []
    needs_subpage = 'subpage' in definition and definition['subpage']

    if needs_subpage:

        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash):
            try:
                log.debug("[%s] Getting subpage at %s" %
                          (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" %
                          (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey

            if get_setting("use_cloudhole", bool):
                subclient.clearance = get_setting('clearance')
                subclient.user_agent = get_setting('user_agent')

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(uri[0].encode('utf-8'))

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' %
                          (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(
                            uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error(
                        "[%s] Subpage extraction for %s failed with: %s" %
                        (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            ret = (name, info_hash, torrent, size, seeds, peers)
            q.put_nowait(ret)

    if not dom:
        raise StopIteration

    if get_setting("use_debug_parser", bool):
        log.debug(
            "[%s] Parser debug | Page content: %s" %
            (provider, client.content.replace('\r', '').replace('\n', '')))
        log.debug("[%s] Parser debug | Matched %d items for '%s' query '%s'" %
                  (provider, len(eval(row_search)), 'row', row_search))

    for item in eval(row_search):
        if get_setting("use_debug_parser", bool):
            item_str = item.__str__()
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'row', row_search, item_str.replace(
                    '\r', '').replace('\n', '')))

        if not item:
            continue
        name = eval(name_search)
        torrent = eval(torrent_search) if torrent_search else ""
        size = eval(size_search) if size_search else ""
        seeds = eval(seeds_search) if seeds_search else ""
        peers = eval(peers_search) if peers_search else ""
        info_hash = eval(info_hash_search) if info_hash_search else ""

        if get_setting("use_debug_parser", bool):
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'name', name_search, name))
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'torrent', torrent_search, torrent))
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'size', size_search, size))
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'seeds', seeds_search, seeds))
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'peers', peers_search, peers))
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'info_hash', info_hash_search, info_hash))

        # Pass client cookies with torrent if private
        if (definition['private'] or get_setting(
                "use_cloudhole", bool)) and not torrent.startswith('magnet'):
            user_agent = USER_AGENT
            if get_setting("use_cloudhole", bool):
                user_agent = get_setting("user_agent")

            if client.passkey:
                torrent = torrent.replace('PASSKEY', client.passkey)
            elif client.token:
                headers = {
                    'Authorization': client.token,
                    'User-Agent': user_agent
                }
                log.debug("[%s] Appending headers: %s" %
                          (provider, repr(headers)))
                torrent = append_headers(torrent, headers)
                log.debug("[%s] Torrent with headers: %s" %
                          (provider, repr(torrent)))
            else:
                log.debug("[%s] Cookies: %s" %
                          (provider, repr(client.cookies())))
                parsed_url = urlparse(definition['root_url'])
                cookie_domain = '{uri.netloc}'.format(uri=parsed_url).replace(
                    'www.', '')
                cookies = []
                log.debug("[%s] cookie_domain: %s" % (provider, cookie_domain))
                for cookie in client._cookies:
                    log.debug(
                        "[%s] cookie for domain: %s (%s=%s)" %
                        (provider, cookie.domain, cookie.name, cookie.value))
                    if cookie_domain in cookie.domain:
                        cookies.append(cookie)
                if cookies:
                    headers = {
                        'Cookie':
                        ";".join(
                            ["%s=%s" % (c.name, c.value) for c in cookies]),
                        'User-Agent':
                        user_agent
                    }
                    log.debug("[%s] Appending headers: %s" %
                              (provider, repr(headers)))
                    torrent = append_headers(torrent, headers)
                    log.debug("[%s] Torrent with headers: %s" %
                              (provider, repr(torrent)))

        if name and torrent and needs_subpage:
            if not torrent.startswith('http'):
                torrent = definition['root_url'] + torrent.encode('utf-8')
            t = Thread(target=extract_subpage,
                       args=(q, name, torrent, size, seeds, peers, info_hash))
            threads.append(t)
        else:
            yield (name, info_hash, torrent, size, seeds, peers)

    if needs_subpage:
        log.debug("[%s] Starting subpage threads..." % provider)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        log.debug("[%s] Threads returned: %s" % (provider, repr(threads)))

        for i in range(q.qsize()):
            ret = q.get_nowait()
            log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret)))
            yield ret
    def search(self, searchTerm, category=None):
        results = []
        # Login
        cookies = self.__login()
        # Prepare cookie string for resolving torrent links
        cookieStr = ''
        for cookie in cookies:
            if cookie.name == 'auth_token' or cookie.name == '_partis16':
                cookieStr += cookie.name + '=' + cookie.value + ';'
        # First request to /torrent/show, otherwise it forces a redirect
        showResponse, _ = self.__request(_Partis__SHOW_URL, {}, cookies, {},
                                         None, 'GET')
        # Do actual search
        categories = ''
        if category:
            categories = ','.join(_Partis__CATEGORIES[category])
        params = {
            'keyword': searchTerm,
            'category': categories,
            'offset': '0',
            'option': '0',
            'ns': 'true',
            'rnd': '0.'
        }
        searchResponse, _ = self.__request(
            _Partis__SEARCH_URL, params, cookies,
            {'X-Requested-With': 'XMLHttpRequest'}, None, 'GET')
        # Fix HTML before parsing
        searchHtml = searchResponse.read().replace('/></div>', '></div>')
        # Parse torrents
        searchDom = Html().feed(searchHtml)
        listeks = searchDom.find_all(tag='div', select=('class', 'listek'))
        for listek in listeks:
            # Parse basic info
            tId = listek(tag='div', select=('class', 'likona'), attribute='id')
            listeklink = listek.find_once(tag='div',
                                          select=('class', 'listeklink'))
            tName = listeklink(tag='a')
            # Get donwload link
            data3t = listek.find_once(tag='div', select=('class', 'data3t'))
            tDldLink = data3t(tag='a', attribute='href')
            size = listek(tag='div', select=('class', 'datat'), order=1)
            try:
                seeders = int(
                    listek(tag='div', select=('class', 'datat'), order=2))
            except Exception:
                seeders = 0
            try:
                peers = int(
                    listek(tag='div', select=('class', 'datat'), order=3))
            except Exception:
                peers = 0

            results.append({
                "name": tName,
                "uri": _Partis__BASE_URL + tDldLink + '|Cookie=' + cookieStr,
                "info_hash": 'PARTIS_' + tId,
                "size": size,
                "provider": _Partis__NAME,
                "icon": 'logo.png',
                "seeds": seeders,
                "peers": peers,
                "is_private": True,
                "Multi": False
            })

        return results