def TDL(book=None, test=False): errmsg = '' provider = "torrentdownloads" host = lazylibrarian.CONFIG['TDL_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host) params = { "type": "search", "cid": "2", "search": book['searchterm'] } searchURL = providerurl + "/rss.xml?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders'].replace(',', '')) link = item['link'] size = int(item['size']) url = None try: pubdate = item['published'] except KeyError: pubdate = None if link and minimumseeders < seeders: # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl+link) if success: new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if not url or not title: logger.debug('Missing url or title') else: res = { 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY'] } if pubdate: res['tor_date'] = pubdate logger.debug('Found %s. Size: %s' % (title, size)) results.append(res) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def ZOO(book=None, test=False): errmsg = '' provider = "zooqle" host = lazylibrarian.CONFIG['ZOO_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/search") params = { "q": book['searchterm'], "category": "books", "fmt": "rss" } searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds'].replace(',', '')) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None mode = 'torrent' if link: url = link mode = 'torrent' if magnet: if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['ZOO_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def LIME(book=None, test=False): errmsg = '' provider = "Limetorrent" host = lazylibrarian.CONFIG['LIME_HOST'] if not host.startswith('http'): host = 'http://' + host params = { "q": book['searchterm'] } providerurl = url_fix(host + "/searchrss/other") searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int(seeders.split('Seeds:')[1].split(' ,')[0].replace(',', '').strip()) except (IndexError, ValueError): seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 try: pubdate = item['published'] except KeyError: pubdate = None url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: res = { 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY'] } if pubdate: res['tor_date'] = pubdate results.append(res) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def TPB(book=None, test=False): errmsg = '' provider = "TPB" host = lazylibrarian.CONFIG['TPB_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/s/?") cat = 0 # 601=ebooks, 102=audiobooks, 0=all, no mag category if 'library' in book: if book['library'] == 'AudioBook': cat = 102 elif book['library'] == 'eBook': cat = 601 elif book['library'] == 'magazine': cat = 0 sterm = makeUnicode(book['searchterm']) page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = { "q": book['searchterm'], "category": cat, "page": page, "orderby": "99" } searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') # tpb uses a named table table = soup.find('table', id='searchResult') if table: rows = table.find_all('tr') else: rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 2: try: new_soup = BeautifulSoup(str(td[1]), 'html5lib') link = new_soup.find("a") magnet = link.get("href") title = link.text size = td[1].text.split(', Size ')[1].split('iB')[0] size = size.replace(' ', '') size = size_in_bytes(size) try: seeders = int(td[2].text.replace(',', '')) except ValueError: seeders = 0 if minimumseeders < seeders: # no point in asking for magnet link if not enough seeders magurl = '%s/%s' % (host, magnet) result, success = fetchURL(magurl) if not success: logger.debug('Error fetching url %s, %s' % (magurl, result)) else: magnet = None new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): magnet = output break if not magnet or not title: logger.debug('Missing magnet or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': magnet, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TPB_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn('Maximum results page search reached, still more results available') next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def EXTRA(book=None, test=False): errmsg = '' provider = "Extratorrent" host = lazylibrarian.CONFIG['EXTRA_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/rss") params = { "type": "search", "s_cat": "2", "search": book['searchterm'] } searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders'].replace(',', '')) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['EXTRA_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def KAT(book=None, test=False): errmsg = '' provider = "KAT" host = lazylibrarian.CONFIG['KAT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/usearch/" + quote(book['searchterm'])) params = { "category": "books", "field": "seeders", "sorder": "desc" } searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) result, success = fetchURL(searchURL) if not success: # seems KAT returns 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success results = [] if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table')[1] # un-named table if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # kat can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = 'http' + str(td[0]).split('href="http')[1].split('.torrent?')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() size = size_in_bytes(size) except ValueError: size = 0 try: seeders = int(td[3].text.replace(',', '')) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def WWT(book=None, test=False): errmsg = '' provider = "WorldWideTorrents" host = lazylibrarian.CONFIG['WWT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/torrents-search.php") sterm = makeUnicode(book['searchterm']) cat = 0 # 0=all, 36=ebooks, 52=mags, 56=audiobooks if 'library' in book: if book['library'] == 'AudioBook': cat = 56 elif book['library'] == 'eBook': cat = 36 elif book['library'] == 'magazine': cat = 52 page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = { "search": book['searchterm'], "page": page, "cat": cat } searchURL = providerurl + "/?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # might return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '503' in result: logger.warn("Cloudflare bot detection? %s: %s" % (provider, result)) logger.warn("Try unblocking %s from a browser" % providerurl) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') rows = [] try: tables = soup.find_all('table') # un-named table table = tables[2] if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str(td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = url_fix(host + '/download.php') + \ str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() size = size_in_bytes(size) except ValueError: size = 0 try: seeders = int(td[2].text.replace(',', '')) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn('Maximum results page search reached, still more results available') next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def TDL(book=None, test=False): errmsg = '' provider = "torrentdownloads" host = lazylibrarian.CONFIG['TDL_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host) params = {"type": "search", "cid": "2", "search": book['searchterm']} searchURL = providerurl + "/rss.xml?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = item['title'] seeders = int(item['seeders'].replace(',', '')) link = item['link'] size = int(item['size']) url = None try: pubdate = item['published'] except KeyError: pubdate = None if link and minimumseeders < seeders: # no point requesting the magnet link if not enough seeders # TDL gives us a relative link result, success = fetchURL(providerurl + link) if success: new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): url = output break if not url or not title: logger.debug('Missing url or title') else: res = { 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TDL_DLPRIORITY'] } if pubdate: res['tor_date'] = pubdate logger.debug('Found %s. Size: %s' % (title, size)) results.append(res) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def GEN(book=None, prov=None, test=False): errmsg = '' provider = "libgen.io" if prov is None: prov = 'GEN' host = lazylibrarian.CONFIG[prov + '_HOST'] if not host.startswith('http'): host = 'http://' + host search = lazylibrarian.CONFIG[prov + '_SEARCH'] if not search or not search.endswith('.php'): search = 'search.php' if 'index.php' not in search and 'search.php' not in search: search = 'search.php' if search[0] == '/': search = search[1:] sterm = makeUnicode(book['searchterm']) page = 1 results = [] next_page = True while next_page: if 'index.php' in search: params = { "s": book['searchterm'], "f_lang": "All", "f_columns": 0, "f_ext": "All" } else: params = { "view": "simple", "open": 0, "phrase": 0, "column": "def", "res": 100, "req": book['searchterm'] } if page > 1: params['page'] = page providerurl = url_fix(host + "/%s" % search) searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '111' in result: # looks like libgen has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) errmsg = result else: logger.debug(searchURL) logger.debug('Error fetching page data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) try: soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table')[ -1] # un-named table, last one in page if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: # skip table headers rows = rows[1:] for row in rows: author = '' title = '' size = '' extn = '' link = '' td = row.find_all('td') if 'index.php' in search and len(td) > 3: try: author = formatAuthorName(td[0].text) title = td[2].text newsoup = BeautifulSoup(str(td[4]), 'html5lib') data = newsoup.find('a') if data: link = data.get('href') extn = td[4].text.split('(')[0].strip() size = td[4].text.split('(')[1].split(')')[0] size = size.upper() except IndexError as e: logger.debug( 'Error parsing libgen index.php results: %s' % str(e)) elif 'search.php' in search and len(td) > 8: try: author = formatAuthorName(td[1].text) title = td[2].text size = td[7].text.upper() extn = td[8].text link = '' newsoup = BeautifulSoup(str(td[2]), 'html5lib') for res in newsoup.find_all('a'): output = res.get('href') if 'md5' in output: link = output break except IndexError as e: logger.debug( 'Error parsing libgen search.php results; %s' % str(e)) size = size_in_bytes(size) if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn if link.startswith('http'): url = redirect_url(host, link) else: if "/index.php?" in link: link = 'md5' + link.split('md5')[1] if "/ads.php?" in link: url = url_fix(host + "/" + link) else: url = url_fix(host + "/ads.php?" + link) bookresult, success = fetchURL(url) if not success: logger.debug( 'Error fetching link data from %s: %s' % (provider, bookresult)) logger.debug(url) url = None else: url = None try: new_soup = BeautifulSoup( bookresult, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output: if output.startswith( 'http' ) and '/get.php' in output: url = output break elif '/get.php' in output: url = '/get.php' + output.split( '/get.php')[1] break elif '/download/book' in output: url = '/download/book' + output.split( '/download/book')[1] break if url and not url.startswith('http'): url = url_fix(host + url) else: url = redirect_url(host, url) except Exception as e: logger.error( '%s parsing bookresult for %s: %s' % (type(e).__name__, link, str(e))) url = None if url: results.append({ 'bookid': book['bookid'], 'tor_prov': provider + '/' + search, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'direct', 'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY'] }) logger.debug('Found %s, Size %s' % (title, size)) next_page = True except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def ZOO(book=None, test=False): errmsg = '' provider = "zooqle" host = lazylibrarian.CONFIG['ZOO_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/search") params = {"q": book['searchterm'], "category": "books", "fmt": "rss"} searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) seeders = int(item['torrent_seeds'].replace(',', '')) link = item['links'][1]['href'] size = int(item['links'][1]['length']) magnet = item['torrent_magneturi'] url = None mode = 'torrent' if link: url = link mode = 'torrent' if magnet: if not url or (url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['ZOO_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # looks like zooqle has ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def LIME(book=None, test=False): errmsg = '' provider = "Limetorrent" host = lazylibrarian.CONFIG['LIME_HOST'] if not host.startswith('http'): host = 'http://' + host params = {"q": book['searchterm']} providerurl = url_fix(host + "/searchrss/other") searchURL = providerurl + "?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = item['description'] seeders = int( seeders.split('Seeds:')[1].split(' ,')[0].replace( ',', '').strip()) except (IndexError, ValueError): seeders = 0 size = item['size'] try: size = int(size) except ValueError: size = 0 try: pubdate = item['published'] except KeyError: pubdate = None url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['url'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: res = { 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['LIME_DLPRIORITY'] } if pubdate: res['tor_date'] = pubdate results.append(res) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: if 'forbidden' in str(e).lower(): # may have ip based access limits logger.error( 'Access forbidden. Please wait a while before trying %s again.' % provider) else: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def EXTRA(book=None, test=False): errmsg = '' provider = "Extratorrent" host = lazylibrarian.CONFIG['EXTRA_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/rss") params = {"type": "search", "s_cat": "2", "search": book['searchterm']} searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) data, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in data: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug('Error fetching data from %s: %s' % (provider, data)) errmsg = data data = False if test: return success results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 if data: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) d = feedparser.parse(data) if len(d.entries): for item in d.entries: try: title = unaccented(item['title']) try: seeders = int(item['seeders'].replace(',', '')) except ValueError: seeders = 0 try: size = int(item['size']) except ValueError: size = 0 url = None for link in item['links']: if 'x-bittorrent' in link['type']: url = link['href'] if not url or not title: logger.debug('No url or title found') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'torrent', 'priority': lazylibrarian.CONFIG['EXTRA_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def TPB(book=None, test=False): errmsg = '' provider = "TPB" host = lazylibrarian.CONFIG['TPB_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/s/?") cat = 0 # 601=ebooks, 102=audiobooks, 0=all, no mag category if 'library' in book: if book['library'] == 'AudioBook': cat = 102 elif book['library'] == 'eBook': cat = 601 elif book['library'] == 'magazine': cat = 0 sterm = makeUnicode(book['searchterm']) page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = { "q": book['searchterm'], "category": cat, "page": page, "orderby": "99" } searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') # tpb uses a named table table = soup.find('table', id='searchResult') if table: rows = table.find_all('tr') else: rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 2: try: new_soup = BeautifulSoup(str(td[1]), 'html5lib') link = new_soup.find("a") magnet = link.get("href") title = link.text size = td[1].text.split(', Size ')[1].split('iB')[0] size = size.replace(' ', '') size = size_in_bytes(size) try: seeders = int(td[2].text.replace(',', '')) except ValueError: seeders = 0 if minimumseeders < seeders: # no point in asking for magnet link if not enough seeders magurl = '%s/%s' % (host, magnet) result, success = fetchURL(magurl) if not success: logger.debug('Error fetching url %s, %s' % (magurl, result)) else: magnet = None new_soup = BeautifulSoup(result, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output and output.startswith('magnet'): magnet = output break if not magnet or not title: logger.debug('Missing magnet or title') else: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': magnet, 'tor_size': str(size), 'tor_type': 'magnet', 'priority': lazylibrarian.CONFIG['TPB_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def WWT(book=None, test=False): errmsg = '' provider = "WorldWideTorrents" host = lazylibrarian.CONFIG['WWT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/torrents-search.php") sterm = makeUnicode(book['searchterm']) cat = 0 # 0=all, 36=ebooks, 52=mags, 56=audiobooks if 'library' in book: if book['library'] == 'AudioBook': cat = 56 elif book['library'] == 'eBook': cat = 36 elif book['library'] == 'magazine': cat = 52 page = 0 results = [] minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 next_page = True while next_page: params = {"search": book['searchterm'], "page": page, "cat": cat} searchURL = providerurl + "/?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # might return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '503' in result: logger.warn("Cloudflare bot detection? %s: %s" % (provider, result)) logger.warn("Try unblocking %s from a browser" % providerurl) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) soup = BeautifulSoup(result, 'html5lib') rows = [] try: tables = soup.find_all('table') # un-named table table = tables[2] if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str( td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = url_fix(host + '/download.php') + \ str(td[0]).split('href="download.php')[1].split('.torrent"')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() size = size_in_bytes(size) except ValueError: size = 0 try: seeders = int(td[2].text.replace(',', '')) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['WWT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) next_page = True else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn( 'Maximum results page search reached, still more results available' ) next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def KAT(book=None, test=False): errmsg = '' provider = "KAT" host = lazylibrarian.CONFIG['KAT_HOST'] if not host.startswith('http'): host = 'http://' + host providerurl = url_fix(host + "/usearch/" + quote(book['searchterm'])) params = {"category": "books", "field": "seeders", "sorder": "desc"} searchURL = providerurl + "/?%s" % urlencode(params) sterm = makeUnicode(book['searchterm']) result, success = fetchURL(searchURL) if not success: # seems KAT returns 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True else: logger.debug(searchURL) logger.debug('Error fetching data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success results = [] if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) minimumseeders = int(lazylibrarian.CONFIG['NUMBEROFSEEDERS']) - 1 soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table')[1] # un-named table if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: rows = rows[1:] # first row is headers for row in rows: td = row.find_all('td') if len(td) > 3: try: title = unaccented(td[0].text) # kat can return magnet or torrent or both. magnet = '' url = '' mode = 'torrent' try: magnet = 'magnet' + str( td[0]).split('href="magnet')[1].split('"')[0] mode = 'magnet' except IndexError: pass try: url = 'http' + str(td[0]).split('href="http')[1].split( '.torrent?')[0] + '.torrent' mode = 'torrent' except IndexError: pass if not url or (magnet and url and lazylibrarian.CONFIG['PREFER_MAGNET']): url = magnet mode = 'magnet' try: size = str(td[1].text).replace(' ', '').upper() size = size_in_bytes(size) except ValueError: size = 0 try: seeders = int(td[3].text.replace(',', '')) except ValueError: seeders = 0 if not url or not title: logger.debug('Missing url or title') elif minimumseeders < seeders: results.append({ 'bookid': book['bookid'], 'tor_prov': provider, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': mode, 'priority': lazylibrarian.CONFIG['KAT_DLPRIORITY'] }) logger.debug('Found %s. Size: %s' % (title, size)) else: logger.debug('Found %s but %s seeder%s' % (title, seeders, plural(seeders))) except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg
def GEN(book=None, prov=None, test=False): errmsg = '' provider = "libgen.io" if not prov: prov = 'GEN' host = lazylibrarian.CONFIG[prov + '_HOST'] if not host.startswith('http'): host = 'http://' + host search = lazylibrarian.CONFIG[prov + '_SEARCH'] if not search or not search.endswith('.php'): search = 'search.php' if 'index.php' not in search and 'search.php' not in search: search = 'search.php' if search[0] == '/': search = search[1:] sterm = makeUnicode(book['searchterm']) page = 1 results = [] next_page = True while next_page: if 'index.php' in search: params = { "s": book['searchterm'], "f_lang": "All", "f_columns": 0, "f_ext": "All" } else: params = { "view": "simple", "open": 0, "phrase": 0, "column": "def", "res": 100, "req": book['searchterm'] } if page > 1: params['page'] = page providerurl = url_fix(host + "/%s" % search) searchURL = providerurl + "?%s" % urlencode(params) next_page = False result, success = fetchURL(searchURL) if not success: # may return 404 if no results, not really an error if '404' in result: logger.debug("No results found from %s for %s" % (provider, sterm)) success = True elif '111' in result: # looks like libgen has ip based access limits logger.error('Access forbidden. Please wait a while before trying %s again.' % provider) errmsg = result else: logger.debug(searchURL) logger.debug('Error fetching page data from %s: %s' % (provider, result)) errmsg = result result = False if test: return success if result: logger.debug('Parsing results from <a href="%s">%s</a>' % (searchURL, provider)) try: soup = BeautifulSoup(result, 'html5lib') rows = [] try: table = soup.find_all('table', rules='rows')[-1] # the last table with rules=rows if table: rows = table.find_all('tr') except IndexError: # no results table in result page rows = [] if len(rows) > 1: # skip table headers rows = rows[1:] for row in rows: author = '' title = '' size = '' extn = '' link = '' td = row.find_all('td') if 'index.php' in search and len(td) > 3: # Foreign fiction try: author = formatAuthorName(td[0].text) title = td[2].text newsoup = BeautifulSoup(str(td[4]), 'html5lib') data = newsoup.find('a') if data: link = data.get('href') extn = td[4].text.split('(')[0].strip() size = td[4].text.split('(')[1].split(')')[0] size = size.upper() except IndexError as e: logger.debug('Error parsing libgen index.php results: %s' % str(e)) elif 'search.php' in search and len(td) > 8: # Non-fiction try: author = formatAuthorName(td[1].text) title = td[2].text size = td[7].text.upper() extn = td[8].text link = '' newsoup = BeautifulSoup(str(td[2]), 'html5lib') for res in newsoup.find_all('a'): output = res.get('href') if 'md5' in output: link = output break except IndexError as e: logger.debug('Error parsing libgen search.php results; %s' % str(e)) size = size_in_bytes(size) if link and title: if author: title = author.strip() + ' ' + title.strip() if extn: title = title + '.' + extn if link.startswith('http'): url = redirect_url(host, link) else: if "/index.php?" in link: link = 'md5' + link.split('md5')[1] if "/ads.php?" in link: url = url_fix(host + "/" + link) else: url = url_fix(host + "/ads.php?" + link) bookresult, success = fetchURL(url) if not success: logger.debug('Error fetching link data from %s: %s' % (provider, bookresult)) logger.debug(url) url = None else: url = None try: new_soup = BeautifulSoup(bookresult, 'html5lib') for link in new_soup.find_all('a'): output = link.get('href') if output: if output.startswith('http') and '/get.php' in output: url = output break elif '/get.php' in output: url = '/get.php' + output.split('/get.php')[1] break elif '/download/book' in output: url = '/download/book' + output.split('/download/book')[1] break if url and not url.startswith('http'): url = url_fix(host + url) else: url = redirect_url(host, url) except Exception as e: logger.error('%s parsing bookresult for %s: %s' % (type(e).__name__, link, str(e))) url = None if url: results.append({ 'bookid': book['bookid'], 'tor_prov': provider + '/' + search, 'tor_title': title, 'tor_url': url, 'tor_size': str(size), 'tor_type': 'direct', 'priority': lazylibrarian.CONFIG[prov + '_DLPRIORITY'] }) logger.debug('Found %s, Size %s' % (title, size)) next_page = True except Exception as e: logger.error("An error occurred in the %s parser: %s" % (provider, str(e))) logger.debug('%s: %s' % (provider, traceback.format_exc())) page += 1 if 0 < lazylibrarian.CONFIG['MAX_PAGES'] < page: logger.warn('Maximum results page search reached, still more results available') next_page = False logger.debug("Found %i result%s from %s for %s" % (len(results), plural(len(results)), provider, sterm)) return results, errmsg