示例#1
0
def open_search(url, query, max_results=10, timeout=60):
    description = Description(url)
    url_template = description.get_best_template()
    if not url_template:
        return
    oquery = Query(url_template)

    # set up initial values
    oquery.searchTerms = query
    oquery.count = max_results
    url = oquery.url()

    counter = max_results
    br = browser()
    with closing(br.open(url, timeout=timeout)) as f:
        doc = etree.fromstring(f.read())
        for data in doc.xpath('//*[local-name() = "entry"]'):
            if counter <= 0:
                break

            counter -= 1

            s = SearchResult()

            s.detail_item = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip()

            for link in data.xpath('./*[local-name() = "link"]'):
                rel = link.get('rel')
                href = link.get('href')
                type = link.get('type')

                if rel and href and type:
                    if 'http://opds-spec.org/thumbnail' in rel:
                        s.cover_url = href
                    elif 'http://opds-spec.org/image/thumbnail' in rel:
                        s.cover_url = href
                    elif 'http://opds-spec.org/acquisition/buy' in rel:
                        s.detail_item = href
                    elif 'http://opds-spec.org/acquisition/sample' in rel:
                        pass
                    elif 'http://opds-spec.org/acquisition' in rel:
                        if type:
                            ext = guess_extension(type)
                            if ext:
                                ext = ext[1:].upper().strip()
                                s.downloads[ext] = href
            s.formats = ', '.join(s.downloads.keys()).strip()

            s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
            s.author = ', '.join(data.xpath('./*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip()

            price_e = data.xpath('.//*[local-name() = "price"][1]')
            if price_e:
                price_e = price_e[0]
                currency_code = price_e.get('currencycode', '')
                price = ''.join(price_e.xpath('.//text()')).strip()
                s.price = currency_code + ' ' + price
                s.price = s.price.strip()

            yield s
示例#2
0
    def search(self, query, max_results=10, timeout=60):
        # check for cyrillic symbols before performing search
        uquery = unicode(query.strip(), 'utf-8')
        reObj = re.search(u'^[а-яА-Я\\d\\s]{2,}$', uquery)
        if not reObj:
            return

        base_url = 'http://e-knigi.net'
        url = base_url + '/virtuemart?page=shop.browse&search_category=0&search_limiter=anywhere&keyword=' + urllib2.quote(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())

            # if the store finds only one product, it opens directly detail view
            for data in doc.xpath('//div[@class="prod_details"]'):
                s = SearchResult()
                s.cover_url = ''.join(data.xpath('.//div[@class="vm_main_info clearfix"]/div[@class="lf"]/a/img/@src')).strip()
                s.title = ''.join(data.xpath('.//div[@class="vm_main_info clearfix"]/div[@class="lf"]/a/img/@alt')).strip()
                s.author = ''.join(data.xpath('.//div[@class="td_bg clearfix"]/div[@class="gk_product_tab"]/div/table/tr[3]/td[2]/text()')).strip()
                s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip()
                s.detail_item = url
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
                return

            # search in store results
            for data in doc.xpath('//div[@class="browseProductContainer"]'):
                if counter <= 0:
                    break
                id = ''.join(data.xpath('.//a[1]/@href')).strip()
                if not id:
                    continue

                title = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@title')).strip()
                author = ''.join(data.xpath('.//div[@style="float:left;width:90%"]/b/text()')).strip().replace('Автор: ', '')

                if title.lower().find(query.lower()) == -1 and author.lower().find(query.lower()) == -1:
                    continue

                counter -= 1

                s = SearchResult()
                s.cover_url = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@src')).strip()
                s.title = title
                s.author = author
                s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip()
                s.detail_item = base_url + id
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
示例#3
0
    def search(self, query, max_results=20, timeout=60):

        br = browser()

        counter = max_results
        page = 1
        while counter:
            with closing(
                br.open(
                    "http://www.publio.pl/e-booki,strona" + str(page) + ".html?q=" + urllib.quote(query),
                    timeout=timeout,
                )
            ) as f:
                doc = html.fromstring(f.read())
                for data in doc.xpath('//div[@class="item"]'):
                    if counter <= 0:
                        break

                    id = "".join(data.xpath('.//div[@class="img"]/a/@href'))
                    if not id:
                        continue

                    cover_url = "".join(data.xpath('.//div[@class="img"]/a/img/@data-original'))
                    title = "".join(data.xpath('.//div[@class="img"]/a/@title'))
                    title2 = "".join(data.xpath('.//div[@class="desc"]/h5//text()'))
                    if title2:
                        title = title + ". " + title2
                    if (
                        "".join(
                            data.xpath('./div[@class="desc"]/div[@class="detailShortList"]/div[last()]/span/text()')
                        ).strip()
                        == "Seria:"
                    ):
                        series = "".join(
                            data.xpath('./div[@class="desc"]/div[@class="detailShortList"]/div[last()]/a/@title')
                        )
                        title = title + " (seria " + series + ")"
                    author = ", ".join(
                        data.xpath('./div[@class="desc"]/div[@class="detailShortList"]/div[@class="row"][1]/a/@title')
                    )
                    price = "".join(data.xpath('.//div[@class="priceBox tk-museo-slab"]/ins/text()'))
                    if not price:
                        price = "".join(data.xpath('.//div[@class="priceBox tk-museo-slab"]/text()')).strip()
                    formats = ", ".join(data.xpath('.//div[@class="formats"]/a/img/@alt'))

                    counter -= 1

                    s = SearchResult()
                    s.cover_url = "http://www.publio.pl" + cover_url
                    s.title = title.strip()
                    s.author = author
                    s.price = price
                    s.detail_item = "http://www.publio.pl" + id.strip()
                    s.drm = SearchResult.DRM_LOCKED if "DRM" in formats else SearchResult.DRM_UNLOCKED
                    s.formats = formats.replace(" DRM", "").strip()

                    yield s
                if not doc.xpath('boolean(//a[@class="next"])'):
                    break
                page += 1
示例#4
0
    def search(self, query, max_results=10, timeout=60):
        url = ('http://www.ebook.nl/store/advanced_search_result.php?keywords=' + urllib2.quote(query))
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@id="books"]/div[@itemtype="http://schema.org/Book"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./meta[@itemprop="url"]/@content')).strip()
                if not id:
                    continue
                cover_url = 'http://www.ebook.nl/store/' + ''.join(data.xpath('.//img[@itemprop="image"]/@src'))
                title = ''.join(data.xpath('./span[@itemprop="name"]/a/text()')).strip()
                author = ''.join(data.xpath('./span[@itemprop="author"]/a/text()')).strip()
                if author == '&nbsp':
                    author = ''
                price = ''.join(data.xpath('.//span[@itemprop="price"]//text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNKNOWN
                s.detail_item = id

                yield s
示例#5
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.ebookshoppe.com/search.php?search_query=' + quote(query)
        br = browser()
        br.addheaders = [("Referer", "http://www.ebookshoppe.com/")]

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[@class="ProductList"]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./div[@class="ProductDetails"]/'
                                        'strong/a/@href')).strip()
                if not id:
                    continue
                cover_url = ''.join(data.xpath('./div[@class="ProductImage"]/a/img/@src'))
                title = ''.join(data.xpath('./div[@class="ProductDetails"]/strong/a/text()'))
                price = ''.join(data.xpath('./div[@class="ProductPriceRating"]/em/text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNLOCKED
                s.detail_item = id

                self.get_author_and_formats(s, timeout)
                if not s.author:
                    continue

                yield s
示例#6
0
    def search(self, query, max_results=20, timeout=60):
        url = 'http://www.escapemagazine.pl/wyszukiwarka?query=' + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="item item_short"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//h2[@class="title"]/a[1]/@href'))
                if not id:
                    continue

                title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
                author = ''.join(data.xpath('.//div[@class="author"]/text()'))
                price = ''.join(data.xpath('.//span[@class="price_now"]/strong/text()')) + ' zł'
                cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://www.escapemagazine.pl' + id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = 'PDF'

                yield s
示例#7
0
    def search(self, query, max_results=10, timeout=60):
        url = ('http://www.whsmith.co.uk/search?keywordCategoryId=wc_dept_ebooks&results=60'
               '&page=1&keywords=' + urllib2.quote(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[@class="product"]'):
                if counter <= 0:
                    break
                id_ = ''.join(data.xpath('./a[@class="product_image_wrap"]/@href'))
                if not id_:
                    continue
                id_ = 'http://www.whsmith.co.uk' + id_
                cover_url = ''.join(data.xpath('.//img[@class="product_image"]/@src'))
                title = ''.join(data.xpath('.//h4[@class="product_title"]/text()'))
                author = ', '.join(data.xpath('.//span[@class="product_second"]/text()'))
                price = ''.join(data.xpath('.//span[@class="price"]/text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_LOCKED
                s.detail_item = id_
                s.formats = 'ePub'

                yield s
示例#8
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://ebooks.eharlequin.com/BANGSearch.dll?Type=FullText&FullTextField=All&FullTextCriteria=' + urllib2.quote(query)
        
        br = browser()
        
        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//table[not(.//@class="sidelink")]/tr[.//ul[@id="details"]]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//ul[@id="details"]/li[@id="title-results"]/a/@href'))
                if not id:
                    continue

                title = ''.join(data.xpath('.//ul[@id="details"]/li[@id="title-results"]/a/text()'))
                author = ''.join(data.xpath('.//ul[@id="details"]/li[@id="author"][1]//a/text()'))
                price = ''.join(data.xpath('.//div[@class="ourprice"]/font/text()'))
                cover_url = ''.join(data.xpath('.//a[@href="%s"]/img/@src' % id))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = 'http://ebooks.eharlequin.com/' + id.strip()
                s.formats = 'EPUB'
                
                yield s
示例#9
0
    def search(self, query, max_results=10, timeout=60):
        url = self.search_url + query.encode('ascii', 'backslashreplace').replace('%', '%25').replace('\\x', '%').replace(' ', '+')
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())#.decode('latin-1', 'replace'))

            data_xpath = '//div[contains(@class, "prod")]'
            format_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and not(contains(@class, "bld"))]/text()'
            asin_xpath = '@name'
            cover_xpath = './/img[@class="productImage"]/@src'
            title_xpath = './/h3[@class="newaps"]/a//text()'
            author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()'
            price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()'

            for data in doc.xpath(data_xpath):
                if counter <= 0:
                    break

                # Even though we are searching digital-text only Amazon will still
                # put in results for non Kindle books (author pages). Se we need
                # to explicitly check if the item is a Kindle book and ignore it
                # if it isn't.
                format_ = ''.join(data.xpath(format_xpath))
                if 'kindle' not in format_.lower():
                    continue

                # We must have an asin otherwise we can't easily reference the
                # book later.
                asin = data.xpath(asin_xpath)
                if asin:
                    asin = asin[0]
                else:
                    continue

                cover_url = ''.join(data.xpath(cover_xpath))

                title = ''.join(data.xpath(title_xpath))
                author = ''.join(data.xpath(author_xpath))
                try:
                    if self.author_article:
                        author = author.split(self.author_article, 1)[1].split(" (")[0]
                except:
                    pass

                price = ''.join(data.xpath(price_xpath))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = asin.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Kindle'

                yield s
示例#10
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.bewrite.net/mm5/merchant.mvc?Search_Code=B&Screen=SRCH&Search=' + urllib2.quote(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@id="content"]//table/tr[position() > 1]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a/@href'))
                if not id:
                    continue

                heading = ''.join(data.xpath('./td[2]//text()'))
                title, q, author = heading.partition('by ')
                cover_url = ''
                price = ''

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
示例#11
0
    def search(self, query, max_results=10, timeout=60):
        url = "http://www.legimi.com/pl/ebooki/?szukaj=" + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@id="listBooks"]/div'):
                if counter <= 0:
                    break

                id = "".join(data.xpath('.//a[@class="plainLink"]/@href'))
                if not id:
                    continue

                cover_url = "".join(data.xpath(".//img[1]/@src"))
                title = "".join(data.xpath('.//span[@class="bookListTitle ellipsis"]/text()'))
                author = "".join(data.xpath('.//span[@class="bookListAuthor ellipsis"]/text()'))
                price = "".join(data.xpath('.//div[@class="bookListPrice"]/span/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = "http://www.legimi.com/" + cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = "http://www.legimi.com/" + id.strip()

                yield s
示例#12
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://bookoteka.pl/list?search=' + urllib.quote_plus(query) + '&cat=1&hp=1&type=1'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[@class="EBOOK"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[@class="item_link"]/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//a[@class="item_link"]/img/@src'))
                title = ''.join(data.xpath('.//div[@class="shelf_title"]/a/text()'))
                author = ''.join(data.xpath('.//div[@class="shelf_authors"][1]/text()'))
                price = ''.join(data.xpath('.//span[@class="EBOOK"]/text()'))
                price = price.replace('.', ',')
                formats = ', '.join(data.xpath('.//a[@class="fancybox protected"]/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = 'http://bookoteka.pl' + cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://bookoteka.pl' + id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.strip()

                yield s
示例#13
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://m.gutenberg.org/ebooks/search.mobile/?default_prefix=all&sort_order=title&query=' + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ol[@class="results"]/li[@class="booklink"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./a/@href'))
                id = id.split('.mobile')[0]

                title = ''.join(data.xpath('.//span[@class="title"]/text()'))
                author = ''.join(data.xpath('.//span[@class="subtitle"]/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = ''

                s.detail_item = id.strip()
                s.title = title.strip()
                s.author = author.strip()
                s.price = '$0.00'
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
示例#14
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://www.beam-shop.de/search?saltFieldLimitation=all&sSearch=' + urllib2.quote(query)
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[contains(@class, "product--box")]'):
                if counter <= 0:
                    break

                id_ = ''.join(data.xpath('./div/div[contains(@class, "product--info")]/a/@href')).strip()
                if not id_:
                    continue
                cover_url = ''.join(data.xpath('./div/div[contains(@class, "product--info")]/a//img/@srcset'))
                if cover_url:
                    cover_url = cover_url.split(',')[0].strip()
                author = data.xpath('.//a[@class="product--author"]/text()')[0].strip()
                title = data.xpath('.//a[@class="product--title"]/text()')[0].strip()
                price = data.xpath('.//div[@class="product--price"]/span/text()')[0].strip()
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNLOCKED
                s.detail_item = id_
#                 s.formats = None
                yield s
示例#15
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.barnesandnoble.com/s/%s?keyword=%s&store=ebook' % (query.replace(' ', '-'), urllib.quote_plus(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[contains(@class, "result-set")]/li[contains(@class, "result")]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[contains(@class, "image-bounding-box")]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//img[contains(@class, "product-image")]/@src'))

                title = ''.join(data.xpath('.//a[@class="title"]//text()'))
                author = ', '.join(data.xpath('.//a[@class="contributor"]//text()'))
                price = ''.join(data.xpath('.//div[@class="price-format"]//span[contains(@class, "price")]/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Nook'

                yield s
示例#16
0
    def search(self, query, max_results=10, timeout=60):
        base_url = 'https://www.millsandboon.co.uk'
        url = base_url + '/search.aspx??format=ebook&searchText=' + urllib2.quote(query)
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//article[contains(@class, "group")]'):
                if counter <= 0:
                    break
                id_ = ''.join(data.xpath('.//div[@class="img-wrapper"]/a/@href')).strip()
                if not id_:
                    continue

                cover_url = ''.join(data.xpath('.//div[@class="img-wrapper"]/a/img/@src'))
                title =  ''.join(data.xpath('.//div[@class="img-wrapper"]/a/img/@alt')).strip()
                author = ''.join(data.xpath('.//a[@class="author"]/text()'))
                price = ''.join(data.xpath('.//div[@class="type-wrapper"]/ul/li[child::span[text()="eBook"]]/a/text()'))
                format_ = ''.join(data.xpath('.//p[@class="doc-meta-format"]/span[last()]/text()'))
                drm = SearchResult.DRM_LOCKED

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = id_
                s.drm = drm
                s.formats = format_

                yield s
示例#17
0
    def search(self, query, max_results=25, timeout=60):
        url = 'http://ebookpoint.pl/search?qa=&szukaj=' + quote_plus(
            query.decode('utf-8').encode('iso-8859-2')) + '&serwisyall=0&wprzyg=0&wsprzed=1&wyczerp=0&formaty=em-p'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[@class="list"]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./a/@href'))
                if not id:
                    continue

                formats = ', '.join(data.xpath('.//ul[@class="book-type book-type-points"]//span[@class="popup"]/span/text()'))
                cover_url = ''.join(data.xpath('.//p[@class="cover"]/img/@data-src'))
                title = ''.join(data.xpath('.//div[@class="book-info"]/h3/a/text()'))
                author = ''.join(data.xpath('.//p[@class="author"]//text()'))
                price = ''.join(data.xpath('.//p[@class="price price-incart"]/a/ins/text()|.//p[@class="price price-add"]/a/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = re.sub(r'\.',',',price)
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.upper()

                yield s
示例#18
0
    def search(self, query, max_results=15, timeout=60):
        search_url = self.shop_url + '/webservice/webservice.asmx/SearchWebService?'\
                    'searchText=%s&searchContext=ebook' % urllib2.quote(query)
        search_urls = [ search_url ]

        ## add this as the fist try if it looks like ozon ID
        if re.match("^\d{6,9}$", query):
            ozon_detail = self.shop_url + '/webservices/OzonWebSvc.asmx/ItemDetail?ID=%s' % query
            search_urls.insert(0, ozon_detail)

        xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
        counter = max_results
        br = browser()

        for url in search_urls:
            with closing(br.open(url, timeout=timeout)) as f:
                raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
                doc = etree.fromstring(raw)
                for data in doc.xpath('//*[local-name()="SearchItems" or local-name()="ItemDetail"]'):
                    if counter <= 0:
                        break
                    counter -= 1

                    s = SearchResult()
                    s.detail_item = data.xpath(xp_template.format('ID'))
                    s.title = data.xpath(xp_template.format('Name'))
                    s.author = data.xpath(xp_template.format('Author'))
                    s.price = data.xpath(xp_template.format('Price'))
                    s.cover_url = data.xpath(xp_template.format('Picture'))
                    s.price = format_price_in_RUR(s.price)
                    yield s
示例#19
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.bubok.pt/resellers/calibre_search/' + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[contains(@class, "libro")]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="url"]/text()'))

                title = ''.join(data.xpath('.//div[@class="titulo"]/text()'))

                author = ''.join(data.xpath('.//div[@class="autor"]/text()'))

                price = ''.join(data.xpath('.//div[@class="precio"]/text()'))

                formats = ''.join(data.xpath('.//div[@class="formatos"]/text()'))

                cover = ''.join(data.xpath('.//div[@class="portada"]/text()'))

                counter -= 1

                s = SearchResult()
                s.title = title.strip()
                s.author = author.strip()
                s.detail_item = id.strip()
                s.price = price.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.strip()
                s.cover_url = cover.strip()
                yield s
示例#20
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://ebooks.foyles.co.uk/catalog/search/?query=' + urllib2.quote(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="doc-item"]'):
                if counter <= 0:
                    break
                id_ = ''.join(data.xpath('.//p[@class="doc-cover"]/a/@href')).strip()
                if not id_:
                    continue
                id_ = 'http://ebooks.foyles.co.uk' + id_

                cover_url = ''.join(data.xpath('.//p[@class="doc-cover"]/a/img/@src'))
                title = ''.join(data.xpath('.//span[@class="title"]/a/text()'))
                author = ', '.join(data.xpath('.//span[@class="author"]/span[@class="author"]/text()'))
                price = ''.join(data.xpath('.//span[@itemprop="price"]/text()')).strip()
                format_ = ''.join(data.xpath('.//p[@class="doc-meta-format"]/span[last()]/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = id_
                s.drm = SearchResult.DRM_LOCKED
                s.formats = format_

                yield s
示例#21
0
    def search(self, query, max_results=15, timeout=60):
        search_url = (
            self.shop_url + "/webservice/webservice.asmx/SearchWebService?"
            "searchText=%s&searchContext=ebook" % urllib2.quote(query)
        )
        search_urls = [search_url]

        xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())'
        counter = max_results
        br = browser()

        for url in search_urls:
            with closing(br.open(url, timeout=timeout)) as f:
                raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
                doc = etree.fromstring(raw)
                for data in doc.xpath('//*[local-name()="SearchItems" or local-name()="ItemDetail"]'):
                    if counter <= 0:
                        break
                    counter -= 1

                    s = SearchResult()
                    s.detail_item = data.xpath(xp_template.format("ID"))
                    s.title = data.xpath(xp_template.format("Name"))
                    s.author = data.xpath(xp_template.format("Author"))
                    s.price = data.xpath(xp_template.format("Price"))
                    s.cover_url = data.xpath(xp_template.format("Picture"))
                    s.price = format_price_in_RUR(s.price)
                    yield s
示例#22
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.kobobooks.com/search/search.html?q=' + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[contains(@class, "flowview-items")]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('./a[contains(@class, "block-link")]/@href'))
                if not id:
                    continue
                id = id[1:]

                price = ''.join(data.xpath('.//a[contains(@class, "primary-button")]//text()'))

                cover_url = ''.join(data.xpath('.//img[1]/@src'))
                cover_url = 'http:%s' % cover_url

                title = ''.join(data.xpath('.//p[contains(@class, "flowview-item-title")]//text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.price = price.strip()
                s.detail_item = 'http://store.kobobooks.com/' + id.strip()
                s.formats = 'EPUB'
                s.drm = SearchResult.DRM_UNKNOWN

                yield s
示例#23
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://zixo.pl/wyszukiwarka/?search=' + urllib.quote(query.encode('utf-8')) + '&product_type=0'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="productInline"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[@class="productThumb"]/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//a[@class="productThumb"]/img/@src'))
                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                author = ','.join(data.xpath('.//div[@class="productDescription"]/span[1]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="priceList"]/span/text()'))
                price = re.sub('\.', ',', price)

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://zixo.pl' + id.strip()
                s.drm = SearchResult.DRM_LOCKED

                yield s
示例#24
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://woblink.com/publication?query=' + urllib.quote_plus(query.encode('utf-8'))
        if max_results > 10:
            if max_results > 20:
                url += '&limit=30'
            else:
                url += '&limit=20'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="book-item"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//td[@class="w10 va-t"]/a[1]/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//td[@class="w10 va-t"]/a[1]/img/@src'))
                title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
                author = ', '.join(data.xpath('.//p[@class="author"]/a/text()'))
                price = ''.join(data.xpath('.//div[@class="prices"]/span[1]/span/text()'))
                price = re.sub('\.', ',', price)
                formats = [ form[8:-4].split('_')[0] for form in data.xpath('.//p[3]/img/@src')]

                s = SearchResult()
                s.cover_url = 'http://woblink.com' + cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price + ' zł'
                s.detail_item = id.strip()
                
                # MOBI should be send first,
                if 'MOBI' in formats:
                    t = copy.copy(s)
                    t.title += ' MOBI'
                    t.drm = SearchResult.DRM_UNLOCKED
                    t.formats = 'MOBI'
                    formats.remove('MOBI')
                    
                    counter -= 1
                    yield t
                    
                # and the remaining formats (if any) next
                if formats:
                    if 'epub' in formats:
                        formats.remove('epub')
                        formats.append('WOBLINK')
                        if 'E Ink' in data.xpath('.//div[@class="prices"]/img/@title'):
                            formats.insert(0, 'EPUB')
                    
                    s.drm = SearchResult.DRM_LOCKED
                    s.formats = ', '.join(formats).upper()
                    
                    counter -= 1
                    yield s
示例#25
0
    def search(self, query, max_results=100, timeout=180):
        url = 'http://www.e-knjiga.si/rezultati_cover.php?query=' + urllib2.quote(query)

        print("will search for: " + urllib2.quote(query) + ":\n  " + url)

        br = browser()

        # counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:

            html=etree.HTML(f.read())
            
            #get list of books
            for book in html.xpath("//table[@class='zebra']"):
                print(etree.tostring(book, pretty_print=True, method="html"))
        
                author = book.find('.//tr/[0]/td/[1]').text
                title = book.find('.//tr/[0]/td/[2]/a').text
                details = 'http://www.e-knjiga.si/' + book.find('.//tr/[0]/td/[2]/a').get("href")

                ## get details
                fo =  urllib2.urlopen(details)
                det=etree.HTML(fo.read())
                fo.close()
                
                table=det.find(".//div[@id='center_container']").find('./table')
                cover='http://www.e-knjiga.si/' + table.find('.//tr/[1]/td/[1]/div/img').get("src")
                description=table.find(".//tr/[6]/td[@class='knjige_spremna']").text
                
                links=[]
                files=table.find('.//tr/[7]/td/[1]')
                for file in files.iter('a'):
                    links.append("http://www.e-knjiga.si/"+file.get("href"))

                
                #print("Author:    " + author)
                #print("Title:     " + title)
                #print("Details:   " + details)
                #print("Description:  " + description)
                #print("Cover:        " + cover)
                #print("Files:       ")
                #print('\n              '.join(links))
                
                s = SearchResult()
                s.title = title
                s.author = author
                s.price = "0.00eur"
                s.drm = SearchResult.DRM_UNLOCKED
                s.detail_item = description
                
                for f in links:
                    ftype = f.split(".")[-1]
                    s.downloads[ftype] = f
                    s.formats += ftype
                    
                s.cover_url = cover

                yield s
示例#26
0
    def search(self, query, max_results=10, timeout=60):
        url = "http://ebookstore.sony.com/search?keyword=%s" % urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for item in doc.xpath(
                '//div[contains(@class, "searchResult")]/' 'descendant::li[contains(@class, "hreview")]'
            ):
                if counter <= 0:
                    break

                curr = "".join(
                    item.xpath('descendant::div[@class="pricing"]/descendant::*[@class="currency"]/@title')
                ).strip()
                amt = "".join(
                    item.xpath('descendant::div[@class="pricing"]/descendant::*[@class="amount"]/text()')
                ).strip()
                s = SearchResult()
                s.price = (curr + " " + amt) if (curr and amt) else _("Not Available")
                title = item.xpath('descendant::h3[@class="item"]')
                if not title:
                    continue
                title = etree.tostring(title[0], method="text", encoding=unicode)
                if not title:
                    continue
                s.title = title.strip()
                s.author = "".join(
                    item.xpath('descendant::li[contains(@class, "author")]/' 'a[@class="fn"]/text()')
                ).strip()
                if not s.author:
                    continue
                detail_url = "".join(
                    item.xpath('descendant::h3[@class="item"]' '/descendant::a[@class="fn" and @href]/@href')
                )
                if not detail_url:
                    continue
                if detail_url.startswith("/"):
                    detail_url = "http:" + detail_url
                s.detail_item = detail_url

                counter -= 1

                cover_url = "".join(item.xpath('descendant::li[@class="coverart"]/' "descendant::img[@src]/@src"))
                if cover_url:
                    if cover_url.startswith("//"):
                        cover_url = "http:" + cover_url
                    elif cover_url.startswith("/"):
                        cover_url = "http://ebookstore.sony.com" + cover_url
                    s.cover_url = url_slash_cleaner(cover_url)

                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = "Sony"

                yield s
示例#27
0
def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
    from css_selectors import Select
    url = 'http://www.kobobooks.com/search/search.html?q=' + urllib.quote_plus(query)

    br = browser()

    with closing(br.open(url, timeout=timeout)) as f:
        raw = f.read()
        if write_html_to is not None:
            with open(write_html_to, 'wb') as f:
                f.write(raw)
        doc = html.fromstring(raw)
        select = Select(doc)
        for i, item in enumerate(select('.result-items .item-wrapper.book')):
            if i == max_results:
                break
            for img in select('.item-image img[src]', item):
                cover_url = img.get('src')
                if cover_url.startswith('//'):
                    cover_url = 'http:' + cover_url
                break
            else:
                cover_url = None

            for p in select('p.title', item):
                title = etree.tostring(p, method='text', encoding=unicode).strip()
                for a in select('a[href]', p):
                    url = 'http://store.kobobooks.com' + a.get('href')
                    break
                else:
                    url = None
                break
            else:
                title = None

            authors = []
            for a in select('p.author a.contributor', item):
                authors.append(etree.tostring(a, method='text', encoding=unicode).strip())
            authors = authors_to_string(authors)

            for p in select('p.price', item):
                price = etree.tostring(p, method='text', encoding=unicode).strip()
                break
            else:
                price = None

            if title and authors and url:
                s = SearchResult()
                s.cover_url = cover_url
                s.title = title
                s.author = authors
                s.price = price
                s.detail_item = url
                s.formats = 'EPUB'
                s.drm = SearchResult.DRM_UNKNOWN

            yield s
示例#28
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://woblink.com/katalog-e-book?query=' + urllib.quote_plus(query.encode('utf-8'))
        if max_results > 10:
            if max_results > 20:
                url += '&limit=30'
            else:
                url += '&limit=20'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="book-item backgroundmix"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//td[@class="w10 va-t mYHaveItYes"]/a[1]/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//td[@class="w10 va-t mYHaveItYes"]/a[1]/img/@src'))
                title = ''.join(data.xpath('.//h2[@class="title"]/a[1]/text()'))
                author = ', '.join(data.xpath('.//td[@class="va-t"]/h3/a/text()'))
                price = ''.join(data.xpath('.//div[@class="prices"]/span[1]/strong/span/text()'))
                price = re.sub('\.', ',', price)
                formats = [ form[8:-4].split('.')[0] for form in data.xpath('.//p[3]/img/@src')]

                s = SearchResult()
                s.cover_url = 'http://woblink.com' + cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price + ' zł'
                s.detail_item = id.strip()

                if 'epub_drm' in formats:
                    s.drm = SearchResult.DRM_LOCKED
                    s.formats = 'EPUB'

                    counter -= 1
                    yield s
                elif 'pdf' in formats:
                    s.drm = SearchResult.DRM_LOCKED
                    s.formats = 'PDF'

                    counter -= 1
                    yield s
                else:
                    s.drm = SearchResult.DRM_UNLOCKED
                    if 'MOBI_nieb' in formats:
                        formats.remove('MOBI_nieb')
                        formats.append('MOBI')
                    s.formats = ', '.join(formats).upper()

                    counter -= 1
                    yield s
示例#29
0
    def search(self, query, max_results=10, timeout=60):
        '''
        XinXii's open search url is:
        http://www.xinxii.com/catalog-search/query/?keywords={searchTerms}&amp;pw={startPage?}&amp;doc_lang={docLang}&amp;ff={docFormat},{docFormat},{docFormat}

        This url requires the docLang and docFormat. However, the search itself
        sent to XinXii does not require them. They can be ignored. We cannot
        push this into the stanard OpenSearchOPDSStore search because of the
        required attributes.

        XinXii doesn't return all info supported by OpenSearchOPDSStore search
        function so this one is modified to remove parts that are used.
        '''

        url = 'http://www.xinxii.com/catalog-search/query/?keywords=' + urllib.quote_plus(query)

        counter = max_results
        br = browser()
        with closing(br.open(url, timeout=timeout)) as f:
            doc = etree.fromstring(f.read())
            for data in doc.xpath('//*[local-name() = "entry"]'):
                if counter <= 0:
                    break

                counter -= 1

                s = SearchResult()

                s.detail_item = ''.join(data.xpath('./*[local-name() = "id"]/text()')).strip()

                for link in data.xpath('./*[local-name() = "link"]'):
                    rel = link.get('rel')
                    href = link.get('href')
                    type = link.get('type')

                    if rel and href and type:
                        if rel in ('http://opds-spec.org/thumbnail', 'http://opds-spec.org/image/thumbnail'):
                            s.cover_url = href
                        if rel == 'alternate':
                            s.detail_item = href

                s.formats = 'EPUB, PDF'

                s.title = ' '.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
                s.author = ', '.join(data.xpath('./*[local-name() = "author"]//*[local-name() = "name"]//text()')).strip()

                price_e = data.xpath('.//*[local-name() = "price"][1]')
                if price_e:
                    price_e = price_e[0]
                    currency_code = price_e.get('currencycode', '')
                    price = ''.join(price_e.xpath('.//text()')).strip()
                    s.price = currency_code + ' ' + price
                    s.price = s.price.strip()


                yield s
示例#30
0
def search_amazon(query, max_results=10, timeout=60,
                  write_html_to=None,
                  base_url=SEARCH_BASE_URL,
                  base_query=SEARCH_BASE_QUERY,
                  field_keywords='k'
                  ):
    uquery = base_query.copy()
    uquery[field_keywords] = query

    def asbytes(x):
        if isinstance(x, type('')):
            x = x.encode('utf-8')
        return x
    uquery = {asbytes(k):asbytes(v) for k, v in uquery.items()}
    url = base_url + '?' + urlencode(uquery)
    br = browser(user_agent=get_user_agent())

    counter = max_results
    with closing(br.open(url, timeout=timeout)) as f:
        raw = f.read()
        if write_html_to is not None:
            with open(write_html_to, 'wb') as f:
                f.write(raw)
        doc = html.fromstring(raw)
        for result in doc.xpath('//div[contains(@class, "s-result-list")]//div[@data-index and @data-asin]'):
            kformat = ''.join(result.xpath('.//a[contains(text(), "Kindle Edition")]//text()'))
            # Even though we are searching digital-text only Amazon will still
            # put in results for non Kindle books (author pages). Se we need
            # to explicitly check if the item is a Kindle book and ignore it
            # if it isn't.
            if 'kindle' not in kformat.lower():
                continue
            asin = result.get('data-asin')
            if not asin:
                continue

            cover_url = ''.join(result.xpath('.//img/@src'))
            title = etree.tostring(result.xpath('.//h5')[0], method='text', encoding='unicode')
            adiv = result.xpath('.//div[contains(@class, "a-color-secondary")]')[0]
            aparts = etree.tostring(adiv, method='text', encoding='unicode').split()
            idx = aparts.index('|')
            author = ' '.join(aparts[1:idx])
            price = ''.join(result.xpath('.//span[contains(@class, "a-price")]/span[contains(@class, "a-offscreen")]/text()'))

            counter -= 1

            s = SearchResult()
            s.cover_url = cover_url.strip()
            s.title = title.strip()
            s.author = author.strip()
            s.detail_item = asin.strip()
            s.price = price.strip()
            s.formats = 'Kindle'

            yield s
示例#31
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.barnesandnoble.com/s/%s?keyword=%s&store=ebook&view=list' % (query.replace(' ', '-'), quote_plus(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            raw = f.read()
            doc = html.fromstring(raw)
            for data in doc.xpath('//ol[contains(@class, "result-set")]/li[contains(@class, "result")]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[contains(@class, "image-block")]/a/@href'))
                if not id:
                    continue

                cover_url = ''
                cover_id = ''.join(data.xpath('.//img[contains(@class, "product-image")]/@id'))
                m = re.search(r"%s'.*?srcUrl: '(?P<iurl>.*?)'.*?}" % cover_id, raw)
                if m:
                    cover_url = m.group('iurl')

                title = ''.join(data.xpath('descendant::p[@class="title"]//span[@class="name"]//text()')).strip()
                if not title:
                    continue

                author = ', '.join(data.xpath('.//ul[contains(@class, "contributors")]//a[contains(@class, "subtle")]//text()')).strip()
                price = ''.join(data.xpath('.//a[contains(@class, "bn-price")]//text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Nook'

                yield s
示例#32
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://www.empik.com/ebooki/ebooki,3501,s?sort=scoreDesc&resultsPP={}&q={}'.format(
            max_results, quote(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath(
                    '//div[@class="search-content js-search-content"]/div'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="name"]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath('.//a/img[@class="lazy"]/@lazy-img'))
                author = ', '.join(
                    data.xpath('.//a[@class="smartAuthor"]/text()'))
                title = ''.join(data.xpath('.//div[@class="name"]/a/@title'))
                price = ''.join(
                    data.xpath('.//div[@class="price ta-price-tile "]/text()'))

                # with closing(br.open('https://empik.com' + id.strip(), timeout=timeout/4)) as nf:
                #    idata = html.fromstring(nf.read())
                #    crawled = idata.xpath('.//a[(@class="chosen hrefstyle") or (@class="connectionsLink hrefstyle")]/text()')
                #    formats = ','.join([re.sub('ebook, ','', x.strip()) for x in crawled if 'ebook' in x])

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.split('  - ')[0]
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = 'https://empik.com' + id.strip()
                # s.formats = formats.upper().strip()

                yield s
示例#33
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://wolnelektury.pl/szukaj?q=' + urllib.quote_plus(
            query.encode('utf-8'))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[@class="Book-item"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="title"]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath('.//div[@class="cover-area"]//img/@src'))
                title = ''.join(
                    data.xpath('.//div[@class="title"]/a[1]/text()'))
                author = ', '.join(
                    data.xpath('.//div[@class="author"]/a/text()'))
                price = '0,00 zł'

                counter -= 1

                s = SearchResult()
                for link in data.xpath(
                        './/div[@class="book-box-formats"]/span/a'):
                    ext = ''.join(link.xpath('./text()'))
                    href = 'http://wolnelektury.pl' + link.get('href')
                    s.downloads[ext] = href
                s.cover_url = 'http://wolnelektury.pl' + cover_url.strip()
                s.title = title.strip()
                s.author = author
                s.price = price
                s.detail_item = 'http://wolnelektury.pl' + id
                s.formats = ', '.join(s.downloads.keys())
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
示例#34
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.rw2010.pl/go.live.php/?launch_macro=catalogue-search-rd'
        values={
            'fkeyword': query,
            'file_type':''
            }

        br = browser()

        counter = max_results
        with closing(br.open(url, data=urlencode(values), timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="ProductDetail"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="img"]/a/@href'))
                if not id:
                    continue

                with closing(br.open(id.strip(), timeout=timeout/4)) as nf:
                    idata = html.fromstring(nf.read())
                    cover_url = ''.join(idata.xpath('//div[@class="boxa"]//div[@class="img"]/img/@src'))
                    author = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Autor: "]/span/text()'))
                    title = ''.join(idata.xpath('//div[@class="boxb"]/h2[1]/text()'))
                    title = re.sub(r'\(#.+\)', '', title)
                    formats = ''.join(idata.xpath('//div[@class="boxb"]//h3[text()="Format pliku: "]/span/text()'))
                    price = ''.join(idata.xpath('//div[@class="price-box"]/span/text()')) + ',00 zł'

                counter -= 1

                s = SearchResult()
                s.cover_url = 'http://www.rw2010.pl/' + cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = re.sub(r'%3D', '=', id)
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats[0:-2].upper()

                yield s
示例#35
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://weightlessbooks.com/?s=' + urllib.quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[@class="product"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="cover"]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//div[@class="cover"]/a/img/@src'))

                price = ''.join(data.xpath('.//div[@class="buy_buttons"]/b[1]/text()'))
                if not price:
                    continue

                formats = ', '.join(data.xpath('.//select[@class="eStore_variation"]//option//text()'))
                formats = formats.upper()

                title = ''.join(data.xpath('.//h3/a/text()'))
                author = ''.join(data.xpath('.//h3//text()'))
                author = author.replace(title, '')

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats

                yield s
示例#36
0
    def search(self, query, max_results=10, timeout=60):

        br = browser()
        page=1

        counter = max_results
        while counter:
            with closing(br.open('https://cdp.pl/ksiazki/e-book.html?q=' + urllib.parse.quote_plus(query) + '&p=' + str(page), timeout=timeout)) as f:
                doc = html.fromstring(f.read())
                for data in doc.xpath('//ul[@class="products"]/li'):
                    if counter <= 0:
                        break

                    id = ''.join(data.xpath('.//a[@class="product-image"]/@href'))
                    if not id:
                        continue

                    cover_url = ''.join(data.xpath('.//a[@class="product-image"]/img/@data-src'))
                    title = ''.join(data.xpath('.//h3[1]/a/@title'))
                    price = ''.join(data.xpath('.//span[@class="custom_price"]/text()'))+','+''.join(data.xpath('.//span[@class="custom_price"]/sup/text()'))
                    author = ''.join(data.xpath('.//div[@class="authors"]/@title'))
                    formats = ''
                    with closing(br.open(id.strip(), timeout=timeout/4)) as nf:
                        idata = html.fromstring(nf.read())
                        formats = idata.xpath('//div[@class="second-part-holder"]//div[@class="product-attributes-container"]/ul/li/span/text()')[-1]

                    counter -= 1

                    s = SearchResult()
                    s.cover_url = cover_url
                    s.title = title.replace(' (ebook)','').strip()
                    s.author = author
                    s.price = price + ' zł'
                    s.detail_item = id.strip()
                    s.drm = SearchResult.DRM_UNLOCKED
                    s.formats = formats.upper().strip()

                    yield s
                if not doc.xpath('//a[@class="next-page"]'):
                    break
            page+=1
示例#37
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.ebooks.com/SearchApp/SearchResults.net?term=' + urllib.quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@id="results"]//li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[1]/@href'))
                mo = re.search('\d+', id)
                if not mo:
                    continue
                id = mo.group()

                cover_url = ''.join(
                    data.xpath('.//div[contains(@class, "img")]//img/@src'))

                title = ''.join(
                    data.xpath('descendant::span[@class="book-title"]/a/text()'
                               )).strip()
                author = ', '.join(
                    data.xpath(
                        'descendant::span[@class="author"]/a/text()')).strip()
                if not title or not author:
                    continue

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.detail_item = '?url=http://www.ebooks.com/cj.asp?IID=' + id.strip(
                ) + '&cjsku=' + id.strip()

                yield s
示例#38
0
    def search(self, query, max_results=25, timeout=60):
        url = 'http://ebookpoint.pl/search.scgi?szukaj=' + urllib.quote_plus(
            query.decode('utf-8').encode(
                'iso-8859-2')) + '&serwisyall=0&x=0&y=0'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="book-list"]/ul[2]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[@class="cover"]/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath('.//a[@class="cover"]/img/@src'))
                title = ''.join(data.xpath('.//h3/a/@title'))
                title = re.sub('eBook.', '', title)
                author = ''.join(data.xpath('.//p[@class="author"]//text()'))
                price = ''.join(data.xpath('.//p[@class="price"]/ins/text()'))

                formats = ', '.join(
                    data.xpath('.//div[@class="ikony"]/span/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = 'http://ebookpoint.pl' + re.sub(
                    '72x9', '65x8', cover_url)
                s.title = title.strip()
                s.author = author.strip()
                s.price = re.sub(r'\.', ',', price)
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.upper()

                yield s
示例#39
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://www.beam-shop.de/search?saltFieldLimitation=all&sSearch=' + quote(
            query)
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[contains(@class, "product--box")]'):
                if counter <= 0:
                    break

                id_ = ''.join(
                    data.xpath(
                        './div/div[contains(@class, "product--info")]/a/@href')
                ).strip()
                if not id_:
                    continue
                cover_url = ''.join(
                    data.xpath(
                        './div/div[contains(@class, "product--info")]/a//img/@srcset'
                    ))
                if cover_url:
                    cover_url = cover_url.split(',')[0].strip()
                author = data.xpath(
                    './/a[@class="product--author"]/text()')[0].strip()
                title = data.xpath(
                    './/a[@class="product--title"]/text()')[0].strip()
                price = data.xpath(
                    './/div[@class="product--price"]/span/text()')[0].strip()
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNLOCKED
                s.detail_item = id_
                #                 s.formats = None
                yield s
示例#40
0
    def search(self, query, max_results=10, timeout=60):
        try:
            results = lg.lookup(lg.search(query, 'title') + lg.search(query, 'author'))
            print('Reached LibGen Mirrors.')
        except Exception as e:
            print(e)
            print('pylibgen crashed. In most cases this is caused by unreachable LibGen Mirrors, try again in a few minutes.')
            return

        self.num_results = len(results)

        for r in results:
            s = SearchResult()
            s.title = r['title']
            s.author = r['author']
            s.price = '$0.00'
            s.drm = SearchResult.DRM_UNLOCKED
            s.formats = r['extension']
            s.detail_item = r['md5']
            s.cover_url = self.get_cover_url(s.detail_item)
            yield s
示例#41
0
    def create_search_result(self, data):
        xp_template = 'normalize-space(@{0})'

        sRes = SearchResult()
        sRes.drm = SearchResult.DRM_UNLOCKED
        sRes.detail_item = data.xpath(xp_template.format('hub_id'))
        sRes.title = data.xpath('string(.//title-info/book-title/text()|.//publish-info/book-name/text())')
        # aut = concat('.//title-info/author/first-name', ' ')
        authors = data.xpath('.//title-info/author/first-name/text()|'
        './/title-info/author/middle-name/text()|'
        './/title-info/author/last-name/text()')
        sRes.author = u' '.join(map(type(u''), authors))
        sRes.price = data.xpath(xp_template.format('price'))
        # cover vs cover_preview
        sRes.cover_url = data.xpath(xp_template.format('cover_preview'))
        sRes.price = format_price_in_RUR(sRes.price)

        types = data.xpath('//fb2-book//files/file/@type')
        fmt_set = _parse_ebook_formats(' '.join(types))
        sRes.formats = ', '.join(fmt_set)
        return sRes
示例#42
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://drmfree.calibre-ebook.com/search/?q=' + quote_plus(query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[@id="object_list"]//li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="links"]/a[1]/@href'))
                id = id.strip()
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//div[@class="cover"]/img/@src'))

                price = ''.join(data.xpath('.//div[@class="price"]/text()'))
                a, b, price = price.partition('Price:')
                price = price.strip()
                if not price:
                    continue

                title = ''.join(data.xpath('.//div/strong/text()'))
                author = ''.join(data.xpath('.//div[@class="author"]//text()'))
                author = author.partition('by')[-1]

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = id.strip()
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
示例#43
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://www.legimi.pl/ebooki/?szukaj=' + urllib.quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@id="listBooks"]/div'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[1]/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath(
                        './/span[@class="listImage imageDarkLoader"]/img/@src')
                )
                title = ''.join(
                    data.xpath(
                        './/span[@class="bookListTitle ellipsis"]/text()'))
                author = ''.join(
                    data.xpath(
                        './/span[@class="bookListAuthor ellipsis"]/text()'))
                price = ''.join(
                    data.xpath('.//div[@class="bookListPrice"]/span/text()'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'https://www.legimi.pl/' + id.strip()

                yield s
示例#44
0
    def search(self, query, max_results=10, timeout=60):
        url = (
            'http://www.ebook.nl/store/advanced_search_result.php?keywords=' +
            quote(query))
        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath(
                    '//div[@id="books"]/div[@itemtype="http://schema.org/Book"]'
            ):
                if counter <= 0:
                    break

                id = ''.join(
                    data.xpath('./meta[@itemprop="url"]/@content')).strip()
                if not id:
                    continue
                cover_url = 'http://www.ebook.nl/store/' + ''.join(
                    data.xpath('.//img[@itemprop="image"]/@src'))
                title = ''.join(
                    data.xpath('./span[@itemprop="name"]/a/text()')).strip()
                author = ''.join(
                    data.xpath('./span[@itemprop="author"]/a/text()')).strip()
                if author == '&nbsp':
                    author = ''
                price = ''.join(
                    data.xpath('.//span[@itemprop="price"]//text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNKNOWN
                s.detail_item = id

                yield s
    def search(self, query, max_results=10, timeout=60):
        url = 'http://zixo.pl/wyszukiwarka/?search=' + urllib.quote(
            query.encode('utf-8')) + '&product_type=0'

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="productInline"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//a[@class="productThumb"]/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath('.//a[@class="productThumb"]/img/@src'))
                title = ''.join(data.xpath('.//a[@class="title"]/text()'))
                author = ','.join(
                    data.xpath(
                        './/div[@class="productDescription"]/span[1]/a/text()')
                )
                price = ''.join(
                    data.xpath('.//div[@class="priceList"]/span/text()'))
                price = re.sub('\.', ',', price)

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://zixo.pl' + id.strip()
                s.drm = SearchResult.DRM_LOCKED

                yield s
示例#46
0
    def _do_search(self, url, max_results, timeout):
        br = browser()
        with closing(br.open(url, timeout=timeout)) as f:
            page = f.read().decode('utf-8')
            doc = html.fromstring(page)

            for data in doc.xpath('//ul[contains(@class,"book_list")]/li'):
                if max_results <= 0:
                    break

                s = SearchResult()
                s.detail_item = ''.join(
                    data.xpath('.//a[@class="th"]/@href')).strip()
                if not id:
                    continue

                s.cover_url = ''.join(
                    data.xpath(
                        './/a[@class="th"]/img/@data-original')).strip()
                s.title = ''.join(
                    data.xpath(
                        './/div[@class="item-title"]/a/text()')).strip()
                s.author = ', '.join(
                    data.xpath('.//div[@class="item-author"]/a/text()')).strip(
                        ', ')

                price_list = data.xpath('.//div[@class="item-price"]')
                for price_item in price_list:
                    if price_item.text.startswith('е-книга:'):
                        s.price = ''.join(price_item.xpath('.//span/text()'))
                        break

                s.price = '0.00 лв.' if not s.price and not price_list else s.price
                if not s.price:
                    # no e-book available
                    continue

                max_results -= 1
                yield s
def search(query, max_results=15, timeout=60):
    url = 'http://www.ozon.ru/?context=search&text=%s&store=1,0&group=div_book' % quote_plus(query)

    counter = max_results
    br = browser()

    with closing(br.open(url, timeout=timeout)) as f:
        raw = xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True)[0]
        root = parse_html(raw)
        for tile in root.xpath('//*[@class="bShelfTile inline"]'):
            if counter <= 0:
                break
            counter -= 1

            s = SearchResult(store_name='OZON.ru')
            s.detail_item = shop_url + tile.xpath('descendant::a[@class="eShelfTile_Link"]/@href')[0]
            s.title = tile.xpath('descendant::span[@class="eShelfTile_ItemNameText"]/@title')[0]
            s.author = tile.xpath('descendant::span[@class="eShelfTile_ItemPerson"]/@title')[0]
            s.price = ''.join(tile.xpath('descendant::div[contains(@class, "eShelfTile_Price")]/text()'))
            s.cover_url = 'http:' + tile.xpath('descendant::img/@data-original')[0]
            s.price = format_price_in_RUR(s.price)
            yield s
示例#48
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.ebookshoppe.com/search.php?search_query=' + quote(
            query)
        br = browser()
        br.addheaders = [("Referer", "http://www.ebookshoppe.com/")]

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//ul[@class="ProductList"]/li'):
                if counter <= 0:
                    break

                id = ''.join(
                    data.xpath('./div[@class="ProductDetails"]/'
                               'strong/a/@href')).strip()
                if not id:
                    continue
                cover_url = ''.join(
                    data.xpath('./div[@class="ProductImage"]/a/img/@src'))
                title = ''.join(
                    data.xpath(
                        './div[@class="ProductDetails"]/strong/a/text()'))
                price = ''.join(
                    data.xpath('./div[@class="ProductPriceRating"]/em/text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.price = price
                s.drm = SearchResult.DRM_UNLOCKED
                s.detail_item = id

                self.get_author_and_formats(s, timeout)
                if not s.author:
                    continue

                yield s
示例#49
0
    def search(self, query, max_results=10, timeout=60):
        url = u'http://uk.nook.com/s/%s?s%%5Bdref%%5D=1&s%%5Bkeyword%%5D=%s' % (query.replace(' ', '-'), urllib.quote(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            raw = f.read()
            doc = html.fromstring(raw)
            for data in doc.xpath('//ul[contains(@class, "product_list")]/li'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//span[contains(@class, "image")]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(data.xpath('.//span[contains(@class, "image")]//img/@data-src'))

                title = ''.join(data.xpath('.//div[contains(@class, "title")]//text()')).strip()
                if not title:
                    continue

                author = ', '.join(data.xpath('.//div[contains(@class, "contributor")]//a/text()')).strip()
                price = ''.join(data.xpath('.//div[contains(@class, "action")]//a//text()')).strip()
                price = re.sub(r'[^\d.,£]', '', price);

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price.strip()
                s.detail_item = 'http://uk.nook.com/' + id.strip()
                s.drm = SearchResult.DRM_UNKNOWN
                s.formats = 'Nook'

                yield s
示例#50
0
    def search(self, query, max_results=10, timeout=60):

        br = browser()
        page=1

        counter = max_results
        while counter:
            with closing(br.open('http://www.koobe.pl/s,p,' + str(page) + ',szukaj/fraza:' + urllib.quote(query), timeout=timeout)) as f:
                doc = html.fromstring(f.read().decode('utf-8'))
                for data in doc.xpath('//div[@class="seach_result"]/div[@class="result"]'):
                    if counter <= 0:
                        break

                    id = ''.join(data.xpath('.//div[@class="cover"]/a/@href'))
                    if not id:
                        continue

                    cover_url = ''.join(data.xpath('.//div[@class="cover"]/a/img/@src'))
                    price = ''.join(data.xpath('.//span[@class="current_price"]/text()'))
                    title = ''.join(data.xpath('.//h2[@class="title"]/a/text()'))
                    author = ', '.join(data.xpath('.//h3[@class="book_author"]/a/text()'))
                    formats = ', '.join(data.xpath('.//div[@class="formats"]/div/div/@title'))

                    counter -= 1

                    s = SearchResult()
                    s.cover_url =  'http://koobe.pl/' + cover_url
                    s.title = title.strip()
                    s.author = author.strip()
                    s.price = price
                    s.detail_item = 'http://koobe.pl' + id[1:]
                    s.formats = formats.upper()
                    s.drm = SearchResult.DRM_UNLOCKED

                    yield s
                if not doc.xpath('//div[@class="site_bottom"]//a[@class="right"]'):
                    break
            page+=1
示例#51
0
    def search(self, query, max_results=10, timeout=60):

        br = browser()
        page=1

        counter = max_results
        while counter:
            with closing(br.open('http://ebooki.allegro.pl/szukaj?fraza=' + urllib.quote(query) + '&strona=' + str(page), timeout=timeout)) as f:
                doc = html.fromstring(f.read().decode('utf-8'))
                for data in doc.xpath('//div[@class="listing-list"]/div[@class="listing-list-item"]'):
                    if counter <= 0:
                        break

                    id = ''.join(data.xpath('.//div[@class="listing-cover-wrapper"]/a/@href'))
                    if not id:
                        continue

                    cover_url = ''.join(data.xpath('.//div[@class="listing-cover-wrapper"]/a/img/@src'))
                    title = ''.join(data.xpath('.//div[@class="listing-info"]/div[1]/a/text()'))
                    author = ', '.join(data.xpath('.//div[@class="listing-info"]/div[2]/a/text()'))
                    price = ''.join(data.xpath('.//div[@class="book-price"]/text()'))
                    formats = ', '.join(data.xpath('.//div[@class="listing-buy-formats"]//div[@class="devices-wrapper"]/span[@class="device-label"]/span/text()'))

                    counter -= 1

                    s = SearchResult()
                    s.cover_url = 'http://ebooki.allegro.pl/' + cover_url
                    s.title = title.strip()
                    s.author = author.strip()
                    s.price = price
                    s.detail_item = 'http://ebooki.allegro.pl/' + id[1:]
                    s.formats = formats.upper()
                    s.drm = SearchResult.DRM_UNLOCKED

                    yield s
                if not doc.xpath('//a[@class="paging-arrow right-paging-arrow"]'):
                    break
            page+=1
示例#52
0
def search(query, max_results=10, timeout=60):
    libgen_results = libgen.search(query)
    for result in libgen_results.results[:min(max_results,
                                              len(libgen_results.results))]:
        s = SearchResult()

        s.title = result.title
        s.author = result.author
        s.series = result.series
        s.language = result.language

        for download in result.downloads:
            s.downloads[download.format] = download.url

        s.formats = ', '.join(s.downloads.keys())
        s.drm = SearchResult.DRM_UNLOCKED
        s.cover_url = result.image_url

        # don't show results with no downloads
        if not s.formats:
            continue

        yield s
示例#53
0
    def search(self, query, max_results=10, timeout=60):

        br = browser()
        page=1

        counter = max_results
        while counter:
            with closing(br.open('https://www.swiatebookow.pl/ebooki/?q=' + quote(query) + '&page=' + str(page), timeout=timeout)) as f:
                doc = html.fromstring(f.read().decode('utf-8'))
                for data in doc.xpath('//div[@class="category-item-container"]//div[@class="book-large"]'):
                    if counter <= 0:
                        break

                    id = ''.join(data.xpath('./a/@href'))
                    if not id:
                        continue

                    cover_url = ''.join(data.xpath('.//div[@class="cover-xs"]/img/@src'))
                    price = ''.join(data.xpath('.//span[@class="item-price"]/text()')+data.xpath('.//span[@class="sub-price"]/text()'))
                    title = ''.join(data.xpath('.//h3/text()'))
                    author = ', '.join(data.xpath('.//div[@class="details"]/p/a/text()'))

                    counter -= 1

                    s = SearchResult()
                    s.cover_url =  'https://www.swiatebookow.pl' + cover_url
                    s.title = title.strip()
                    s.author = author.strip()
                    s.price = price
                    s.detail_item = 'https://www.swiatebookow.pl' + id
                    # s.formats = formats.upper()
                    s.drm = SearchResult.DRM_UNLOCKED

                    yield s
                if not doc.xpath('//div[@class="paging_bootstrap pagination"]//a[@class="next"]'):
                    break
            page+=1
示例#54
0
    def search(self, query, max_results=10, timeout=60):
        '''
        Searches LibGen for Books. Since the mirror links are not direct
        downloads, it should not provide these as `s.downloads`.
        '''

        debug_print('Libgen Fiction::__init__.py:LibgenStore:search:query =',
                    query)

        libgen_results = self.libgen.search(query)

        for result in libgen_results.results[:min(max_results, len(libgen_results.results))]:
            debug_print('Libgen Fiction::__init__.py:LibgenStore:search:'
                        'result.title =',
                        result.title)

            for mirror in result.mirrors[0:1]:  # Calibre only shows 1 anyway
                debug_print('Libgen Fiction::__init__.py:LibgenStore:search:'
                            'result.mirror.url =', mirror.url)

                s = SearchResult()

                s.store_name = PLUGIN_NAME
                s.cover_url = result.image_url
                s.title = '{} ({}, {}{})'.format(
                    result.title, result.language, mirror.size, mirror.unit)
                s.author = result.authors
                s.price = '0.00'
                s.detail_item = result.md5
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = mirror.format
                s.plugin_author = PLUGIN_AUTHORS

                debug_print('Libgen Fiction::__init__.py:LibgenStore:search:s =',
                            s)

                yield s
示例#55
0
    def search(self, query, max_results=10, timeout=60):
        url = (
            'https://www.whsmith.co.uk/search?keywordCategoryId=wc_dept_ebooks&results=60'
            '&page=1&keywords=' + quote(query))

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//li[@class="product"]'):
                if counter <= 0:
                    break
                id_ = ''.join(
                    data.xpath('./a[@class="product_image_wrap"]/@href'))
                if not id_:
                    continue
                id_ = 'https://www.whsmith.co.uk' + id_
                cover_url = ''.join(
                    data.xpath('.//img[@class="product_image"]/@src'))
                title = ''.join(
                    data.xpath('.//h4[@class="product_title"]/text()'))
                author = ', '.join(
                    data.xpath('.//span[@class="product_second"]/text()'))
                price = ''.join(data.xpath('.//span[@class="price"]/text()'))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.drm = SearchResult.DRM_LOCKED
                s.detail_item = id_
                s.formats = 'ePub'

                yield s
示例#56
0
    def search(self, query, max_results=10, timeout=60):
        url = 'https://www.legimi.pl/ebooki/?sort=score&searchphrase=' + quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath(
                    '//div[@class="book-search row auto-clear"]/div'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="panel-body"]/a/@href'))
                if not id:
                    continue

                cover_url = ''.join(
                    data.xpath('.//div[@class="img-content"]/img/@data-src'))
                title = ''.join(
                    data.xpath(
                        './/a[@class="book-title clampBookTitle"]/text()'))
                author = ' '.join(
                    data.xpath(
                        './/div[@class="authors-container clampBookAuthors"]/a/text()'
                    ))
                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.detail_item = 'https://www.legimi.pl' + id.strip()
                s.drm = SearchResult.DRM_UNLOCKED

                yield s
示例#57
0
    def search(self, query, max_results=10, timeout=60):
        url = 'http://www.bubok.pt/resellers/calibre_search/' + urllib.quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[contains(@class, "libro")]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="url"]/text()'))

                title = ''.join(data.xpath('.//div[@class="titulo"]/text()'))

                author = ''.join(data.xpath('.//div[@class="autor"]/text()'))

                price = ''.join(data.xpath('.//div[@class="precio"]/text()'))

                formats = ''.join(
                    data.xpath('.//div[@class="formatos"]/text()'))

                cover = ''.join(data.xpath('.//div[@class="portada"]/text()'))

                counter -= 1

                s = SearchResult()
                s.title = title.strip()
                s.author = author.strip()
                s.detail_item = id.strip()
                s.price = price.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = formats.strip()
                s.cover_url = cover.strip()
                yield s
    def search(self, query, max_results=20, timeout=60):
        url = 'http://www.escapemagazine.pl/wyszukiwarka?query=' + urllib.quote_plus(
            query)

        br = browser()

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="item item_short"]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//h2[@class="title"]/a[1]/@href'))
                if not id:
                    continue

                title = ''.join(
                    data.xpath('.//h2[@class="title"]/a[1]/text()'))
                author = ''.join(data.xpath('.//div[@class="author"]/text()'))
                price = ''.join(
                    data.xpath(
                        './/span[@class="price_now"]/strong/text()')) + ' zł'
                cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = price
                s.detail_item = 'http://www.escapemagazine.pl' + id.strip()
                s.drm = SearchResult.DRM_UNLOCKED
                s.formats = 'PDF'

                yield s
示例#59
0
    def search(self, query, max_results=20, timeout=60):

        br = browser()

        counter = max_results
        page = 1
        while counter:
            with closing(br.open('http://www.publio.pl/e-booki,strona' + str(page) + '.html?q=' + quote(query), timeout=timeout)) as f:  # noqa
                doc = html.fromstring(f.read())
                for data in doc.xpath('//div[@class="products-list"]//div[@class="product-tile"]'):
                    if counter <= 0:
                        break

                    id = ''.join(data.xpath('.//a[@class="product-tile-cover"]/@href'))
                    if not id:
                        continue

                    cover_url = ''.join(data.xpath('.//img[@class="product-tile-cover-photo"]/@src'))
                    title = ''.join(data.xpath('.//span[@class="product-tile-title-long"]/text()'))
                    author = ', '.join(data.xpath('.//span[@class="product-tile-author"]/a/text()'))
                    price = ''.join(data.xpath('.//div[@class="product-tile-price-wrapper "]/a/ins/text()'))
                    formats = ''.join(data.xpath('.//a[@class="product-tile-cover"]/img/@alt')).split(' - ebook ')[1]

                    counter -= 1

                    s = SearchResult()
                    s.cover_url = 'http://www.publio.pl' + cover_url
                    s.title = title.strip()
                    s.author = author
                    s.price = price
                    s.detail_item = 'http://www.publio.pl' + id.strip()
                    s.formats = formats.upper().strip()

                    yield s
                if not doc.xpath('boolean(//a[@class="next"])'):
                    break
                page+=1
示例#60
0
    def search(self, query, max_results=12, timeout=60):
        url = 'http://virtualo.pl/?q=' + urllib.quote(query)

        br = browser()
        no_drm_pattern = re.compile(r'Watermark|Brak')

        counter = max_results
        with closing(br.open(url, timeout=timeout)) as f:
            doc = html.fromstring(f.read())
            for data in doc.xpath('//div[@class="products-list-wrapper"]//li[@class="product "]'):
                if counter <= 0:
                    break

                id = ''.join(data.xpath('.//div[@class="cover-wrapper"]//a/@href')).split(r'?q=')[0]
                if not id:
                    continue

                price = ''.join(data.xpath('.//div[@class="information"]//div[@class="price"]/text()'))
                cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))
                title = ''.join(data.xpath('.//div[@class="title"]/a//text()'))
                author = ', '.join(data.xpath('.//div[@class="information"]//div[@class="authors"]/a//text()'))
                formats = [form.strip() for form in data.xpath('.//div[@class="information"]//div[@class="format"]/a//text()')]
                nodrm = no_drm_pattern.search(''.join(data.xpath('.//div[@class="protection"]/text()')))

                counter -= 1

                s = SearchResult()
                s.cover_url = cover_url
                s.title = title.strip()
                s.author = author.strip()
                s.price = re.sub('\.',',',price.strip())
                s.detail_item = 'http://virtualo.pl' + id
                s.formats = ', '.join(formats).upper()
                s.drm = SearchResult.DRM_UNLOCKED if nodrm else SearchResult.DRM_LOCKED

                yield s