def open_search(url, query, max_results=10, timeout=60): description = Description(url) url_template = description.get_best_template() if not url_template: return oquery = Query(url_template) # set up initial values oquery.searchTerms = query oquery.count = max_results url = oquery.url() counter = max_results br = browser() with closing(br.open(url, timeout=timeout)) as f: doc = etree.fromstring(f.read()) for data in doc.xpath('//*[local-name() = "entry"]'): if counter <= 0: break counter -= 1 s = SearchResult() s.detail_item = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip() for link in data.xpath('./*[local-name() = "link"]'): rel = link.get('rel') href = link.get('href') type = link.get('type') if rel and href and type: if 'http://opds-spec.org/thumbnail' in rel: s.cover_url = href elif 'http://opds-spec.org/image/thumbnail' in rel: s.cover_url = href elif 'http://opds-spec.org/acquisition/buy' in rel: s.detail_item = href elif 'http://opds-spec.org/acquisition/sample' in rel: pass elif 'http://opds-spec.org/acquisition' in rel: if type: ext = guess_extension(type) if ext: ext = ext[1:].upper().strip() s.downloads[ext] = href s.formats = ', '.join(s.downloads.keys()).strip() s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip() s.author = ', '.join(data.xpath('./*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip() price_e = data.xpath('.//*[local-name() = "price"][1]') if price_e: price_e = price_e[0] currency_code = price_e.get('currencycode', '') price = ''.join(price_e.xpath('.//text()')).strip() s.price = currency_code + ' ' + price s.price = s.price.strip() yield s
def parse_book(data, base_url): s = SearchResult() s.detail_item = ''.join(data.xpath( './*[local-name() = "id"]/text()')).strip() for link in data.xpath('./*[local-name() = "link"]'): rel = link.get('rel') href = link.get('href') type = link.get('type') if rel and href and type: link_url = urljoin(base_url, href) if 'http://opds-spec.org/thumbnail' in rel: s.cover_url = link_url elif 'http://opds-spec.org/image/thumbnail' in rel: s.cover_url = link_url elif 'http://opds-spec.org/acquisition/buy' in rel: s.detail_item = link_url elif 'http://opds-spec.org/acquisition/sample' in rel: pass elif 'http://opds-spec.org/acquisition' in rel: if type: ext = guess_extension(type) if ext: ext = ext[1:].upper().strip() s.downloads[ext] = link_url s.formats = ', '.join(s.downloads.keys()).strip() s.title = ' '.join(data.xpath( './*[local-name() = "title"]//text()')).strip() s.author = ', '.join(data.xpath( './*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip() price_e = data.xpath('.//*[local-name() = "price"][1]') if price_e: price_e = price_e[0] currency_code = price_e.get('currencycode', '') price = ''.join(price_e.xpath('.//text()')).strip() s.price = currency_code + ' ' + price s.price = s.price.strip() return s
def search_flibusta(url, query, web_url, max_results=10, timeout=60): description = Description(url) url_template = description.get_best_template() if not url_template: return oquery = Query(url_template) # set up initial values oquery.searchTerms = query oquery.count = max_results url = oquery.url() counter = max_results br = browser() with closing(br.open(url, timeout=timeout)) as f: doc = etree.fromstring(f.read()) for data in doc.xpath('//*[local-name() = "entry"]'): if counter <= 0: break counter -= 1 s = SearchResult() s.detail_item = ''.join( data.xpath('./*[local-name() = "id"]/text()')).strip() for link in data.xpath('./*[local-name() = "link"]'): rel = link.get('rel') href = link.get('href') type = link.get('type') if rel and href and type: if 'http://opds-spec.org/thumbnail' in rel: s.cover_url = web_url + href elif 'http://opds-spec.org/image/thumbnail' in rel: s.cover_url = web_url + href elif 'http://opds-spec.org/acquisition/buy' in rel: s.detail_item = web_url + href elif 'http://opds-spec.org/acquisition/sample' in rel: pass elif 'http://opds-spec.org/acquisition/open-access' in rel: if 'application/fb2+zip' in type: s.downloads['FB2'] = web_url + href elif 'application/txt+zip' in type: s.downloads['TXT'] = web_url + href elif 'application/html+zip' in type: s.downloads['HTML'] = web_url + href elif 'application/x-mobipocket-ebook' in type: s.downloads['MOBI'] = web_url + href elif type: ext = guess_extension(type) ext2 = guess_extension(type.replace("+zip", "")) if ext: ext = ext[1:].upper().strip() s.downloads[ext] = web_url + href elif ext2: ext2 = ext2[1:].upper().strip() s.downloads[ext2] = web_url + href s.formats = ', '.join(s.downloads.keys()).strip() s.title = ' '.join( data.xpath('./*[local-name() = "title"]//text()')).strip() s.author = ', '.join( data.xpath( './*[local-name() = "author"]//*[local-name() = "name"]//text()' )).strip() s.price = '$0.00' s.drm = SearchResult.DRM_UNLOCKED yield s
def search(self, query, max_results=10, timeout=60): description = Description(self.open_search_url) url_template = description.get_best_template() if not url_template: return oquery = Query(url_template) # set up initial values oquery.searchTerms = query oquery.count = max_results url = oquery.url() counter = max_results br = self.create_browser() while url != None and counter > 0: with closing(br.open(url, timeout=timeout)) as f: s = f.read() doc = etree.fromstring(s) url = None for link in doc.xpath('//*[local-name() = "link"]'): rel = link.get('rel') href = link.get('href') type = link.get('type') if rel and href and type: if rel == 'next' and type == 'application/atom+xml': if href[0] == "/": href = self.base_url + href url = href for data in doc.xpath('//*[local-name() = "entry"]'): if counter <= 0: break counter -= 1 s = SearchResult() s.detail_item = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip() drm = False for link in data.xpath('./*[local-name() = "link"]'): rel = link.get('rel') href = link.get('href') type = link.get('type') if rel and href and type: if 'http://opds-spec.org/thumbnail' in rel: s.cover_url = href elif 'http://opds-spec.org/image/thumbnail' in rel: s.cover_url = href elif 'http://opds-spec.org/acquisition/buy' in rel: s.detail_item = href elif 'http://opds-spec.org/acquisition' in rel: if type: ext = guess_extension(type) if type == 'application/fb2+xml': ext = '.fb2' if ext: ext = ext[1:].upper().strip() if href[0] == "/": href = self.base_url + href s.downloads[ext] = href for enc in link.xpath('./*[local-name() = "encryption_method"]'): drm = True s.formats = ', '.join(s.downloads.keys()).strip() s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip() s.author = ', '.join(data.xpath('./*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip() s.drm = SearchResult.DRM_LOCKED if drm else SearchResult.DRM_UNLOCKED price_e = data.xpath('.//*[local-name() = "price"][1]') if price_e: price_e = price_e[0] currency_code = price_e.get('currencycode', '') price = ''.join(price_e.xpath('.//text()')).strip() s.price = currency_code + ' ' + price s.price = s.price.strip() if s.cover_url: s.cover_bak = s.cover_url s.cover_url = None yield s