Python url_fix примеры использования

Язык программирования: Python

Пространство имен/Пакет: lazylibrarian.formatter

Метод/Функция: url_fix

Примеров на hotexamples.com: 16

Python url_fix - 16 примеров найдено. Это лучшие примеры Python кода для lazylibrarian.formatter.url_fix, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

0

Показать файл

Файл: torrentparser.py Проект: knobunc/LazyLibrarian

def TDL(book=None, test=False):
    errmsg = ''
    provider = "torrentdownloads"
    host = lazylibrarian.CONFIG['TDL_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {
        "type": "search",
        "cid": "2",
        "search": book['searchterm']
    }
    searchURL = providerurl + "/rss.xml?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'].replace(',', ''))
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    try:
                        pubdate = item['published']
                    except KeyError:
                        pubdate = None

                    if link and minimumseeders < seeders:
                        # no point requesting the magnet link if not enough seeders
                        # TDL gives us a relative link
                        result, success = fetchURL(providerurl+link)
                        if success:
                            new_soup = BeautifulSoup(result, 'html5lib')
                            for link in new_soup.find_all('a'):
                                output = link.get('href')
                                if output and output.startswith('magnet'):
                                    url = output
                                    break

                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            res = {
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                                'tor_type': 'magnet',
                                'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY']
                            }
                            if pubdate:
                                res['tor_date'] = pubdate
                            logger.debug('Found %s. Size: %s' % (title, size))
                            results.append(res)
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Пример #2

0

Показать файл

Файл: torrentparser.py Проект: knobunc/LazyLibrarian

def ZOO(book=None, test=False):
    errmsg = ''
    provider = "zooqle"
    host = lazylibrarian.CONFIG['ZOO_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/search")

    params = {
        "q": book['searchterm'],
        "category": "books",
        "fmt": "rss"
    }
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'].replace(',', ''))
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    mode = 'torrent'
                    if link:
                        url = link
                        mode = 'torrent'
                    if magnet:
                        if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': mode,
                            'priority': lazylibrarian.CONFIG['ZOO_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                        logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Пример #3

0

Показать файл

Файл: torrentparser.py Проект: knobunc/LazyLibrarian

def LIME(book=None, test=False):
    errmsg = ''
    provider = "Limetorrent"
    host = lazylibrarian.CONFIG['LIME_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    params = {
        "q": book['searchterm']
    }
    providerurl = url_fix(host + "/searchrss/other")
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(seeders.split('Seeds:')[1].split(' ,')[0].replace(',', '').strip())
                    except (IndexError, ValueError):
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    try:
                        pubdate = item['published']
                    except KeyError:
                        pubdate = None

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        res = {
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': 'torrent',
                            'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY']
                        }
                        if pubdate:
                            res['tor_date'] = pubdate
                        results.append(res)
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                        logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Пример #4

0

Показать файл

Файл: torrentparser.py Проект: knobunc/LazyLibrarian

def TPB(book=None, test=False):
    errmsg = ''
    provider = "TPB"
    host = lazylibrarian.CONFIG['TPB_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/s/?")

    cat = 0  # 601=ebooks, 102=audiobooks, 0=all, no mag category
    if 'library' in book:
        if book['library'] == 'AudioBook':
            cat = 102
        elif book['library'] == 'eBook':
            cat = 601
        elif book['library'] == 'magazine':
            cat = 0

    sterm = makeUnicode(book['searchterm'])

    page = 0
    results = []
    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    next_page = True

    while next_page:

        params = {
            "q": book['searchterm'],
            "category": cat,
            "page": page,
            "orderby": "99"
        }

        searchURL = providerurl + "?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)

        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" % (provider, sterm))
                success = True
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching data from %s: %s' % (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
            soup = BeautifulSoup(result, 'html5lib')
            # tpb uses a named table
            table = soup.find('table', id='searchResult')
            if table:
                rows = table.find_all('tr')
            else:
                rows = []

            if len(rows) > 1:
                rows = rows[1:]  # first row is headers
            for row in rows:
                td = row.find_all('td')
                if len(td) > 2:
                    try:
                        new_soup = BeautifulSoup(str(td[1]), 'html5lib')
                        link = new_soup.find("a")
                        magnet = link.get("href")
                        title = link.text
                        size = td[1].text.split(', Size ')[1].split('iB')[0]
                        size = size.replace('&nbsp;', '')
                        size = size_in_bytes(size)
                        try:
                            seeders = int(td[2].text.replace(',', ''))
                        except ValueError:
                            seeders = 0

                        if minimumseeders < seeders:
                            # no point in asking for magnet link if not enough seeders
                            magurl = '%s/%s' % (host, magnet)
                            result, success = fetchURL(magurl)
                            if not success:
                                logger.debug('Error fetching url %s, %s' % (magurl, result))
                            else:
                                magnet = None
                                new_soup = BeautifulSoup(result, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output and output.startswith('magnet'):
                                        magnet = output
                                        break
                            if not magnet or not title:
                                logger.debug('Missing magnet or title')
                            else:
                                results.append({
                                    'bookid': book['bookid'],
                                    'tor_prov': provider,
                                    'tor_title': title,
                                    'tor_url': magnet,
                                    'tor_size': str(size),
                                    'tor_type': 'magnet',
                                    'priority': lazylibrarian.CONFIG['TPB_DLPRIORITY']
                                })
                                logger.debug('Found %s. Size: %s' % (title, size))
                                next_page = True
                        else:
                            logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
                    except Exception as e:
                        logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                        logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn('Maximum results page search reached, still more results available')
            next_page = False

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

Пример #5

0

Показать файл

Файл: torrentparser.py Проект: knobunc/LazyLibrarian

def EXTRA(book=None, test=False):
    errmsg = ''
    provider = "Extratorrent"
    host = lazylibrarian.CONFIG['EXTRA_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {
        "type": "search",
        "s_cat": "2",
        "search": book['searchterm']
    }
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'].replace(',', ''))
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': 'torrent',
                            'priority': lazylibrarian.CONFIG['EXTRA_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Пример #6

0

Показать файл

Файл: torrentparser.py Проект: knobunc/LazyLibrarian

def KAT(book=None, test=False):
    errmsg = ''
    provider = "KAT"
    host = lazylibrarian.CONFIG['KAT_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + quote(book['searchterm']))

    params = {
        "category": "books",
        "field": "seeders",
        "sorder": "desc"
    }
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    result, success = fetchURL(searchURL)
    if not success:
        # seems KAT returns 404 if no results, not really an error
        if '404' in result:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, result))
            errmsg = result
        result = False

    if test:
        return success

    results = []

    if result:
        logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
        minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
        soup = BeautifulSoup(result, 'html5lib')
        rows = []
        try:
            table = soup.find_all('table')[1]  # un-named table
            if table:
                rows = table.find_all('tr')
        except IndexError:  # no results table in result page
            rows = []

        if len(rows) > 1:
            rows = rows[1:]  # first row is headers

        for row in rows:
            td = row.find_all('td')
            if len(td) > 3:
                try:
                    title = unaccented(td[0].text)
                    # kat can return magnet or torrent or both.
                    magnet = ''
                    url = ''
                    mode = 'torrent'
                    try:
                        magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0]
                        mode = 'magnet'
                    except IndexError:
                        pass
                    try:
                        url = 'http' + str(td[0]).split('href="http')[1].split('.torrent?')[0] + '.torrent'
                        mode = 'torrent'
                    except IndexError:
                        pass

                    if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']):
                        url = magnet
                        mode = 'magnet'

                    try:
                        size = str(td[1].text).replace('&nbsp;', '').upper()
                        size = size_in_bytes(size)
                    except ValueError:
                        size = 0
                    try:
                        seeders = int(td[3].text.replace(',', ''))
                    except ValueError:
                        seeders = 0

                    if not url or not title:
                        logger.debug('Missing url or title')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': mode,
                            'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

Пример #7

0

Показать файл

Файл: torrentparser.py Проект: knobunc/LazyLibrarian

def WWT(book=None, test=False):
    errmsg = ''
    provider = "WorldWideTorrents"
    host = lazylibrarian.CONFIG['WWT_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/torrents-search.php")

    sterm = makeUnicode(book['searchterm'])

    cat = 0  # 0=all, 36=ebooks, 52=mags, 56=audiobooks
    if 'library' in book:
        if book['library'] == 'AudioBook':
            cat = 56
        elif book['library'] == 'eBook':
            cat = 36
        elif book['library'] == 'magazine':
            cat = 52

    page = 0
    results = []
    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    next_page = True

    while next_page:
        params = {
            "search": book['searchterm'],
            "page": page,
            "cat": cat
        }
        searchURL = providerurl + "/?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # might return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" % (provider, sterm))
                success = True
            elif '503' in result:
                logger.warn("Cloudflare bot detection? %s: %s" % (provider, result))
                logger.warn("Try unblocking %s from a browser" % providerurl)
                success = True
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching data from %s: %s' % (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
            soup = BeautifulSoup(result, 'html5lib')
            rows = []
            try:
                tables = soup.find_all('table')  # un-named table
                table = tables[2]
                if table:
                    rows = table.find_all('tr')
            except IndexError:  # no results table in result page
                rows = []

            if len(rows) > 1:
                rows = rows[1:]  # first row is headers

            for row in rows:
                td = row.find_all('td')
                if len(td) > 3:
                    try:
                        title = unaccented(td[0].text)
                        # can return magnet or torrent or both.
                        magnet = ''
                        url = ''
                        mode = 'torrent'
                        try:
                            magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0]
                            mode = 'magnet'
                        except IndexError:
                            pass
                        try:
                            url = url_fix(host + '/download.php') + \
                                          str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent'
                            mode = 'torrent'
                        except IndexError:
                            pass

                        if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                        try:
                            size = str(td[1].text).replace('&nbsp;', '').upper()
                            size = size_in_bytes(size)
                        except ValueError:
                            size = 0
                        try:
                            seeders = int(td[2].text.replace(',', ''))
                        except ValueError:
                            seeders = 0

                        if not url or not title:
                            logger.debug('Missing url or title')
                        elif minimumseeders < seeders:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                                'tor_type': mode,
                                'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY']
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                            next_page = True
                        else:
                            logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders)))
                    except Exception as e:
                        logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                        logger.debug('%s: %s' % (provider, traceback.format_exc()))
        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn('Maximum results page search reached, still more results available')
            next_page = False

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

Пример #8

0

Показать файл

def TDL(book=None, test=False):
    errmsg = ''
    provider = "torrentdownloads"
    host = lazylibrarian.CONFIG['TDL_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host)

    params = {"type": "search", "cid": "2", "search": book['searchterm']}
    searchURL = providerurl + "/rss.xml?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = item['title']
                    seeders = int(item['seeders'].replace(',', ''))
                    link = item['link']
                    size = int(item['size'])
                    url = None

                    try:
                        pubdate = item['published']
                    except KeyError:
                        pubdate = None

                    if link and minimumseeders < seeders:
                        # no point requesting the magnet link if not enough seeders
                        # TDL gives us a relative link
                        result, success = fetchURL(providerurl + link)
                        if success:
                            new_soup = BeautifulSoup(result, 'html5lib')
                            for link in new_soup.find_all('a'):
                                output = link.get('href')
                                if output and output.startswith('magnet'):
                                    url = output
                                    break

                        if not url or not title:
                            logger.debug('Missing url or title')
                        else:
                            res = {
                                'bookid': book['bookid'],
                                'tor_prov': provider,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                                'tor_type': 'magnet',
                                'priority':
                                lazylibrarian.CONFIG['TDL_DLPRIORITY']
                            }
                            if pubdate:
                                res['tor_date'] = pubdate
                            logger.debug('Found %s. Size: %s' % (title, size))
                            results.append(res)
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Пример #9

0

Показать файл

Файл: directparser.py Проект: geoffg41/LazyLibrarian

def GEN(book=None, prov=None, test=False):
    errmsg = ''
    provider = "libgen.io"
    if prov is None:
        prov = 'GEN'
    host = lazylibrarian.CONFIG[prov + '_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    search = lazylibrarian.CONFIG[prov + '_SEARCH']
    if not search or not search.endswith('.php'):
        search = 'search.php'
    if 'index.php' not in search and 'search.php' not in search:
        search = 'search.php'
    if search[0] == '/':
        search = search[1:]

    sterm = makeUnicode(book['searchterm'])

    page = 1
    results = []
    next_page = True

    while next_page:
        if 'index.php' in search:
            params = {
                "s": book['searchterm'],
                "f_lang": "All",
                "f_columns": 0,
                "f_ext": "All"
            }
        else:
            params = {
                "view": "simple",
                "open": 0,
                "phrase": 0,
                "column": "def",
                "res": 100,
                "req": book['searchterm']
            }

        if page > 1:
            params['page'] = page

        providerurl = url_fix(host + "/%s" % search)
        searchURL = providerurl + "?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" %
                             (provider, sterm))
                success = True
            elif '111' in result:
                # looks like libgen has ip based access limits
                logger.error(
                    'Access forbidden. Please wait a while before trying %s again.'
                    % provider)
                errmsg = result
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching page data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            try:
                soup = BeautifulSoup(result, 'html5lib')
                rows = []

                try:
                    table = soup.find_all('table')[
                        -1]  # un-named table, last one in page
                    if table:
                        rows = table.find_all('tr')
                except IndexError:  # no results table in result page
                    rows = []

                if len(rows) > 1:  # skip table headers
                    rows = rows[1:]

                for row in rows:
                    author = ''
                    title = ''
                    size = ''
                    extn = ''
                    link = ''
                    td = row.find_all('td')

                    if 'index.php' in search and len(td) > 3:
                        try:
                            author = formatAuthorName(td[0].text)
                            title = td[2].text
                            newsoup = BeautifulSoup(str(td[4]), 'html5lib')
                            data = newsoup.find('a')
                            if data:
                                link = data.get('href')
                                extn = td[4].text.split('(')[0].strip()
                                size = td[4].text.split('(')[1].split(')')[0]
                                size = size.upper()
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen index.php results: %s' %
                                str(e))

                    elif 'search.php' in search and len(td) > 8:
                        try:
                            author = formatAuthorName(td[1].text)
                            title = td[2].text
                            size = td[7].text.upper()
                            extn = td[8].text
                            link = ''
                            newsoup = BeautifulSoup(str(td[2]), 'html5lib')
                            for res in newsoup.find_all('a'):
                                output = res.get('href')
                                if 'md5' in output:
                                    link = output
                                    break
                        except IndexError as e:
                            logger.debug(
                                'Error parsing libgen search.php results; %s' %
                                str(e))

                    size = size_in_bytes(size)

                    if link and title:
                        if author:
                            title = author.strip() + ' ' + title.strip()
                        if extn:
                            title = title + '.' + extn

                        if link.startswith('http'):
                            url = redirect_url(host, link)
                        else:
                            if "/index.php?" in link:
                                link = 'md5' + link.split('md5')[1]

                            if "/ads.php?" in link:
                                url = url_fix(host + "/" + link)
                            else:
                                url = url_fix(host + "/ads.php?" + link)

                        bookresult, success = fetchURL(url)
                        if not success:
                            logger.debug(
                                'Error fetching link data from %s: %s' %
                                (provider, bookresult))
                            logger.debug(url)
                            url = None
                        else:
                            url = None
                            try:
                                new_soup = BeautifulSoup(
                                    bookresult, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output:
                                        if output.startswith(
                                                'http'
                                        ) and '/get.php' in output:
                                            url = output
                                            break
                                        elif '/get.php' in output:
                                            url = '/get.php' + output.split(
                                                '/get.php')[1]
                                            break
                                        elif '/download/book' in output:
                                            url = '/download/book' + output.split(
                                                '/download/book')[1]
                                            break

                                if url and not url.startswith('http'):
                                    url = url_fix(host + url)
                                else:
                                    url = redirect_url(host, url)
                            except Exception as e:
                                logger.error(
                                    '%s parsing bookresult for %s: %s' %
                                    (type(e).__name__, link, str(e)))
                                url = None

                        if url:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider + '/' + search,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                'direct',
                                'priority':
                                lazylibrarian.CONFIG[prov + '_DLPRIORITY']
                            })
                            logger.debug('Found %s, Size %s' % (title, size))
                        next_page = True

            except Exception as e:
                logger.error("An error occurred in the %s parser: %s" %
                             (provider, str(e)))
                logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

Пример #10

0

Показать файл

def ZOO(book=None, test=False):
    errmsg = ''
    provider = "zooqle"
    host = lazylibrarian.CONFIG['ZOO_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/search")

    params = {"q": book['searchterm'], "category": "books", "fmt": "rss"}
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    seeders = int(item['torrent_seeds'].replace(',', ''))
                    link = item['links'][1]['href']
                    size = int(item['links'][1]['length'])
                    magnet = item['torrent_magneturi']

                    url = None
                    mode = 'torrent'
                    if link:
                        url = link
                        mode = 'torrent'
                    if magnet:
                        if not url or (url and
                                       lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            mode,
                            'priority':
                            lazylibrarian.CONFIG['ZOO_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # looks like zooqle has ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Пример #11

0

Показать файл

def LIME(book=None, test=False):
    errmsg = ''
    provider = "Limetorrent"
    host = lazylibrarian.CONFIG['LIME_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    params = {"q": book['searchterm']}
    providerurl = url_fix(host + "/searchrss/other")
    searchURL = providerurl + "?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])
                    try:
                        seeders = item['description']
                        seeders = int(
                            seeders.split('Seeds:')[1].split(' ,')[0].replace(
                                ',', '').strip())
                    except (IndexError, ValueError):
                        seeders = 0

                    size = item['size']
                    try:
                        size = int(size)
                    except ValueError:
                        size = 0

                    try:
                        pubdate = item['published']
                    except KeyError:
                        pubdate = None

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['url']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        res = {
                            'bookid': book['bookid'],
                            'tor_prov': provider,
                            'tor_title': title,
                            'tor_url': url,
                            'tor_size': str(size),
                            'tor_type': 'torrent',
                            'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY']
                        }
                        if pubdate:
                            res['tor_date'] = pubdate
                        results.append(res)
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    if 'forbidden' in str(e).lower():
                        # may have ip based access limits
                        logger.error(
                            'Access forbidden. Please wait a while before trying %s again.'
                            % provider)
                    else:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Пример #12

0

Показать файл

def EXTRA(book=None, test=False):
    errmsg = ''
    provider = "Extratorrent"
    host = lazylibrarian.CONFIG['EXTRA_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/rss")

    params = {"type": "search", "s_cat": "2", "search": book['searchterm']}
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    data, success = fetchURL(searchURL)
    if not success:
        # may return 404 if no results, not really an error
        if '404' in data:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug('Error fetching data from %s: %s' % (provider, data))
            errmsg = data
        data = False

    if test:
        return success

    results = []

    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    if data:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        d = feedparser.parse(data)
        if len(d.entries):
            for item in d.entries:
                try:
                    title = unaccented(item['title'])

                    try:
                        seeders = int(item['seeders'].replace(',', ''))
                    except ValueError:
                        seeders = 0

                    try:
                        size = int(item['size'])
                    except ValueError:
                        size = 0

                    url = None
                    for link in item['links']:
                        if 'x-bittorrent' in link['type']:
                            url = link['href']

                    if not url or not title:
                        logger.debug('No url or title found')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            'torrent',
                            'priority':
                            lazylibrarian.CONFIG['EXTRA_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))

                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))

    return results, errmsg

Пример #13

0

Показать файл

def TPB(book=None, test=False):
    errmsg = ''
    provider = "TPB"
    host = lazylibrarian.CONFIG['TPB_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/s/?")

    cat = 0  # 601=ebooks, 102=audiobooks, 0=all, no mag category
    if 'library' in book:
        if book['library'] == 'AudioBook':
            cat = 102
        elif book['library'] == 'eBook':
            cat = 601
        elif book['library'] == 'magazine':
            cat = 0

    sterm = makeUnicode(book['searchterm'])

    page = 0
    results = []
    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    next_page = True

    while next_page:

        params = {
            "q": book['searchterm'],
            "category": cat,
            "page": page,
            "orderby": "99"
        }

        searchURL = providerurl + "?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)

        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" %
                             (provider, sterm))
                success = True
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            soup = BeautifulSoup(result, 'html5lib')
            # tpb uses a named table
            table = soup.find('table', id='searchResult')
            if table:
                rows = table.find_all('tr')
            else:
                rows = []

            if len(rows) > 1:
                rows = rows[1:]  # first row is headers
            for row in rows:
                td = row.find_all('td')
                if len(td) > 2:
                    try:
                        new_soup = BeautifulSoup(str(td[1]), 'html5lib')
                        link = new_soup.find("a")
                        magnet = link.get("href")
                        title = link.text
                        size = td[1].text.split(', Size ')[1].split('iB')[0]
                        size = size.replace('&nbsp;', '')
                        size = size_in_bytes(size)
                        try:
                            seeders = int(td[2].text.replace(',', ''))
                        except ValueError:
                            seeders = 0

                        if minimumseeders < seeders:
                            # no point in asking for magnet link if not enough seeders
                            magurl = '%s/%s' % (host, magnet)
                            result, success = fetchURL(magurl)
                            if not success:
                                logger.debug('Error fetching url %s, %s' %
                                             (magurl, result))
                            else:
                                magnet = None
                                new_soup = BeautifulSoup(result, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output and output.startswith('magnet'):
                                        magnet = output
                                        break
                            if not magnet or not title:
                                logger.debug('Missing magnet or title')
                            else:
                                results.append({
                                    'bookid':
                                    book['bookid'],
                                    'tor_prov':
                                    provider,
                                    'tor_title':
                                    title,
                                    'tor_url':
                                    magnet,
                                    'tor_size':
                                    str(size),
                                    'tor_type':
                                    'magnet',
                                    'priority':
                                    lazylibrarian.CONFIG['TPB_DLPRIORITY']
                                })
                                logger.debug('Found %s. Size: %s' %
                                             (title, size))
                                next_page = True
                        else:
                            logger.debug('Found %s but %s seeder%s' %
                                         (title, seeders, plural(seeders)))
                    except Exception as e:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

Пример #14

0

Показать файл

def WWT(book=None, test=False):
    errmsg = ''
    provider = "WorldWideTorrents"
    host = lazylibrarian.CONFIG['WWT_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/torrents-search.php")

    sterm = makeUnicode(book['searchterm'])

    cat = 0  # 0=all, 36=ebooks, 52=mags, 56=audiobooks
    if 'library' in book:
        if book['library'] == 'AudioBook':
            cat = 56
        elif book['library'] == 'eBook':
            cat = 36
        elif book['library'] == 'magazine':
            cat = 52

    page = 0
    results = []
    minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
    next_page = True

    while next_page:
        params = {"search": book['searchterm'], "page": page, "cat": cat}
        searchURL = providerurl + "/?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # might return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" %
                             (provider, sterm))
                success = True
            elif '503' in result:
                logger.warn("Cloudflare bot detection? %s: %s" %
                            (provider, result))
                logger.warn("Try unblocking %s from a browser" % providerurl)
                success = True
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching data from %s: %s' %
                             (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' %
                         (searchURL, provider))
            soup = BeautifulSoup(result, 'html5lib')
            rows = []
            try:
                tables = soup.find_all('table')  # un-named table
                table = tables[2]
                if table:
                    rows = table.find_all('tr')
            except IndexError:  # no results table in result page
                rows = []

            if len(rows) > 1:
                rows = rows[1:]  # first row is headers

            for row in rows:
                td = row.find_all('td')
                if len(td) > 3:
                    try:
                        title = unaccented(td[0].text)
                        # can return magnet or torrent or both.
                        magnet = ''
                        url = ''
                        mode = 'torrent'
                        try:
                            magnet = 'magnet' + str(
                                td[0]).split('href="magnet')[1].split('"')[0]
                            mode = 'magnet'
                        except IndexError:
                            pass
                        try:
                            url = url_fix(host + '/download.php') + \
                                          str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent'
                            mode = 'torrent'
                        except IndexError:
                            pass

                        if not url or (magnet and url and
                                       lazylibrarian.CONFIG['PREFER_MAGNET']):
                            url = magnet
                            mode = 'magnet'

                        try:
                            size = str(td[1].text).replace('&nbsp;',
                                                           '').upper()
                            size = size_in_bytes(size)
                        except ValueError:
                            size = 0
                        try:
                            seeders = int(td[2].text.replace(',', ''))
                        except ValueError:
                            seeders = 0

                        if not url or not title:
                            logger.debug('Missing url or title')
                        elif minimumseeders < seeders:
                            results.append({
                                'bookid':
                                book['bookid'],
                                'tor_prov':
                                provider,
                                'tor_title':
                                title,
                                'tor_url':
                                url,
                                'tor_size':
                                str(size),
                                'tor_type':
                                mode,
                                'priority':
                                lazylibrarian.CONFIG['WWT_DLPRIORITY']
                            })
                            logger.debug('Found %s. Size: %s' % (title, size))
                            next_page = True
                        else:
                            logger.debug('Found %s but %s seeder%s' %
                                         (title, seeders, plural(seeders)))
                    except Exception as e:
                        logger.error("An error occurred in the %s parser: %s" %
                                     (provider, str(e)))
                        logger.debug('%s: %s' %
                                     (provider, traceback.format_exc()))
        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn(
                'Maximum results page search reached, still more results available'
            )
            next_page = False

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

Пример #15

0

Показать файл

def KAT(book=None, test=False):
    errmsg = ''
    provider = "KAT"
    host = lazylibrarian.CONFIG['KAT_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    providerurl = url_fix(host + "/usearch/" + quote(book['searchterm']))

    params = {"category": "books", "field": "seeders", "sorder": "desc"}
    searchURL = providerurl + "/?%s" % urlencode(params)

    sterm = makeUnicode(book['searchterm'])

    result, success = fetchURL(searchURL)
    if not success:
        # seems KAT returns 404 if no results, not really an error
        if '404' in result:
            logger.debug("No results found from %s for %s" % (provider, sterm))
            success = True
        else:
            logger.debug(searchURL)
            logger.debug('Error fetching data from %s: %s' %
                         (provider, result))
            errmsg = result
        result = False

    if test:
        return success

    results = []

    if result:
        logger.debug('Parsing results from <a href="%s">%s</a>' %
                     (searchURL, provider))
        minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1
        soup = BeautifulSoup(result, 'html5lib')
        rows = []
        try:
            table = soup.find_all('table')[1]  # un-named table
            if table:
                rows = table.find_all('tr')
        except IndexError:  # no results table in result page
            rows = []

        if len(rows) > 1:
            rows = rows[1:]  # first row is headers

        for row in rows:
            td = row.find_all('td')
            if len(td) > 3:
                try:
                    title = unaccented(td[0].text)
                    # kat can return magnet or torrent or both.
                    magnet = ''
                    url = ''
                    mode = 'torrent'
                    try:
                        magnet = 'magnet' + str(
                            td[0]).split('href="magnet')[1].split('"')[0]
                        mode = 'magnet'
                    except IndexError:
                        pass
                    try:
                        url = 'http' + str(td[0]).split('href="http')[1].split(
                            '.torrent?')[0] + '.torrent'
                        mode = 'torrent'
                    except IndexError:
                        pass

                    if not url or (magnet and url
                                   and lazylibrarian.CONFIG['PREFER_MAGNET']):
                        url = magnet
                        mode = 'magnet'

                    try:
                        size = str(td[1].text).replace('&nbsp;', '').upper()
                        size = size_in_bytes(size)
                    except ValueError:
                        size = 0
                    try:
                        seeders = int(td[3].text.replace(',', ''))
                    except ValueError:
                        seeders = 0

                    if not url or not title:
                        logger.debug('Missing url or title')
                    elif minimumseeders < seeders:
                        results.append({
                            'bookid':
                            book['bookid'],
                            'tor_prov':
                            provider,
                            'tor_title':
                            title,
                            'tor_url':
                            url,
                            'tor_size':
                            str(size),
                            'tor_type':
                            mode,
                            'priority':
                            lazylibrarian.CONFIG['KAT_DLPRIORITY']
                        })
                        logger.debug('Found %s. Size: %s' % (title, size))
                    else:
                        logger.debug('Found %s but %s seeder%s' %
                                     (title, seeders, plural(seeders)))
                except Exception as e:
                    logger.error("An error occurred in the %s parser: %s" %
                                 (provider, str(e)))
                    logger.debug('%s: %s' % (provider, traceback.format_exc()))

    logger.debug("Found %i result%s from %s for %s" %
                 (len(results), plural(len(results)), provider, sterm))
    return results, errmsg

Пример #16

0

Показать файл

Файл: directparser.py Проект: DobyTang/LazyLibrarian

def GEN(book=None, prov=None, test=False):
    errmsg = ''
    provider = "libgen.io"
    if not prov:
        prov = 'GEN'
    host = lazylibrarian.CONFIG[prov + '_HOST']
    if not host.startswith('http'):
        host = 'http://' + host

    search = lazylibrarian.CONFIG[prov + '_SEARCH']
    if not search or not search.endswith('.php'):
        search = 'search.php'
    if 'index.php' not in search and 'search.php' not in search:
        search = 'search.php'
    if search[0] == '/':
        search = search[1:]

    sterm = makeUnicode(book['searchterm'])

    page = 1
    results = []
    next_page = True

    while next_page:
        if 'index.php' in search:
            params = {
                "s": book['searchterm'],
                "f_lang": "All",
                "f_columns": 0,
                "f_ext": "All"
            }
        else:
            params = {
                "view": "simple",
                "open": 0,
                "phrase": 0,
                "column": "def",
                "res": 100,
                "req": book['searchterm']
            }

        if page > 1:
            params['page'] = page

        providerurl = url_fix(host + "/%s" % search)
        searchURL = providerurl + "?%s" % urlencode(params)

        next_page = False
        result, success = fetchURL(searchURL)
        if not success:
            # may return 404 if no results, not really an error
            if '404' in result:
                logger.debug("No results found from %s for %s" % (provider, sterm))
                success = True
            elif '111' in result:
                # looks like libgen has ip based access limits
                logger.error('Access forbidden. Please wait a while before trying %s again.' % provider)
                errmsg = result
            else:
                logger.debug(searchURL)
                logger.debug('Error fetching page data from %s: %s' % (provider, result))
                errmsg = result
            result = False

        if test:
            return success

        if result:
            logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider))
            try:
                soup = BeautifulSoup(result, 'html5lib')
                rows = []

                try:
                    table = soup.find_all('table', rules='rows')[-1]  # the last table with rules=rows
                    if table:
                        rows = table.find_all('tr')

                except IndexError:  # no results table in result page
                    rows = []

                if len(rows) > 1:  # skip table headers
                    rows = rows[1:]

                for row in rows:
                    author = ''
                    title = ''
                    size = ''
                    extn = ''
                    link = ''
                    td = row.find_all('td')

                    if 'index.php' in search and len(td) > 3:
                        # Foreign fiction
                        try:
                            author = formatAuthorName(td[0].text)
                            title = td[2].text
                            newsoup = BeautifulSoup(str(td[4]), 'html5lib')
                            data = newsoup.find('a')
                            if data:
                                link = data.get('href')
                                extn = td[4].text.split('(')[0].strip()
                                size = td[4].text.split('(')[1].split(')')[0]
                                size = size.upper()
                        except IndexError as e:
                            logger.debug('Error parsing libgen index.php results: %s' % str(e))

                    elif 'search.php' in search and len(td) > 8:
                        # Non-fiction
                        try:
                            author = formatAuthorName(td[1].text)
                            title = td[2].text
                            size = td[7].text.upper()
                            extn = td[8].text
                            link = ''
                            newsoup = BeautifulSoup(str(td[2]), 'html5lib')
                            for res in newsoup.find_all('a'):
                                output = res.get('href')
                                if 'md5' in output:
                                    link = output
                                    break
                        except IndexError as e:
                            logger.debug('Error parsing libgen search.php results; %s' % str(e))

                    size = size_in_bytes(size)

                    if link and title:
                        if author:
                            title = author.strip() + ' ' + title.strip()
                        if extn:
                            title = title + '.' + extn

                        if link.startswith('http'):
                            url = redirect_url(host, link)
                        else:
                            if "/index.php?" in link:
                                link = 'md5' + link.split('md5')[1]

                            if "/ads.php?" in link:
                                url = url_fix(host + "/" + link)
                            else:
                                url = url_fix(host + "/ads.php?" + link)

                        bookresult, success = fetchURL(url)
                        if not success:
                            logger.debug('Error fetching link data from %s: %s' % (provider, bookresult))
                            logger.debug(url)
                            url = None
                        else:
                            url = None
                            try:
                                new_soup = BeautifulSoup(bookresult, 'html5lib')
                                for link in new_soup.find_all('a'):
                                    output = link.get('href')
                                    if output:
                                        if output.startswith('http') and '/get.php' in output:
                                            url = output
                                            break
                                        elif '/get.php' in output:
                                            url = '/get.php' + output.split('/get.php')[1]
                                            break
                                        elif '/download/book' in output:
                                            url = '/download/book' + output.split('/download/book')[1]
                                            break

                                if url and not url.startswith('http'):
                                    url = url_fix(host + url)
                                else:
                                    url = redirect_url(host, url)
                            except Exception as e:
                                logger.error('%s parsing bookresult for %s: %s' % (type(e).__name__, link, str(e)))
                                url = None

                        if url:
                            results.append({
                                'bookid': book['bookid'],
                                'tor_prov': provider + '/' + search,
                                'tor_title': title,
                                'tor_url': url,
                                'tor_size': str(size),
                                'tor_type': 'direct',
                                'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY']
                            })
                            logger.debug('Found %s, Size %s' % (title, size))
                        next_page = True

            except Exception as e:
                logger.error("An error occurred in the %s parser: %s" % (provider, str(e)))
                logger.debug('%s: %s' % (provider, traceback.format_exc()))

        page += 1
        if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page:
            logger.warn('Maximum results page search reached, still more results available')
            next_page = False

    logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm))
    return results, errmsg