def build_q_list(param): q_list = [] if 'q' in param: # Solr 4+ has support for regexes (eg `key:/foo.*/`)! But for now, let's not # expose that and escape all '/'. Otherwise `key:/works/OL1W` is interpreted as # a regex. q_param = param['q'].strip().replace('/', '\\/') else: q_param = None use_dismax = False if q_param: if q_param == '*:*': q_list.append(q_param) elif 'NOT ' in q_param: # this is a hack q_list.append(q_param.strip()) elif re_fields.search(q_param): q_list.extend(i['op'] if 'op' in i else '{}:({})'.format(i['field'], i['value']) for i in parse_query_fields(q_param)) else: isbn = normalize_isbn(q_param) if isbn and len(isbn) in (10, 13): q_list.append('isbn:(%s)' % isbn) else: q_list.append(q_param.strip().replace(':', r'\:')) use_dismax = True else: if 'author' in param: v = param['author'].strip() m = re_author_key.search(v) if m: q_list.append("author_key:(%s)" % m.group(1)) else: v = re_to_esc.sub(r'\\\g<0>', v) # Somehow v can be empty at this point, # passing the following with empty strings causes a severe error in SOLR if v: q_list.append( "(author_name:({name}) OR author_alternative_name:({name}))" .format(name=v)) check_params = [ 'title', 'publisher', 'oclc', 'lccn', 'contributor', 'subject', 'place', 'person', 'time', ] q_list += [ '{}:({})'.format(k, re_to_esc.sub(r'\\\g<0>', param[k])) for k in check_params if k in param ] if param.get('isbn'): q_list.append('isbn:(%s)' % (normalize_isbn(param['isbn']) or param['isbn'])) return (q_list, use_dismax)
def normalize_record_isbns(rec): """ Returns the Edition import record with all ISBN fields cleaned. :param dict rec: Edition import record :rtype: dict :return: A record with cleaned ISBNs in the various possible ISBN locations. """ for field in ('isbn_13', 'isbn_10', 'isbn'): if rec.get(field): rec[field] = [normalize_isbn(isbn) for isbn in rec.get(field) if normalize_isbn(isbn)] return rec
def build_q_list(param): q_list = [] if 'q' in param: q_param = param['q'].strip() else: q_param = None use_dismax = False if q_param: if q_param == '*:*': q_list.append(q_param) elif 'NOT ' in q_param: # this is a hack q_list.append(q_param.strip()) elif re_fields.search(q_param): q_list.extend(i['op'] if 'op' in i else '%s:(%s)' % (i['field'], i['value']) for i in parse_query_fields(q_param)) else: isbn = normalize_isbn(q_param) if isbn and len(isbn) in (10, 13): q_list.append('isbn:(%s)' % isbn) else: q_list.append(q_param.strip().replace(':', r'\:')) use_dismax = True else: if 'author' in param: v = param['author'].strip() m = re_author_key.search(v) if m: q_list.append("author_key:(%s)" % m.group(1)) else: v = re_to_esc.sub(r'\\\g<0>', v) # Somehow v can be empty at this point, # passing the following with empty strings causes a severe error in SOLR if v: q_list.append( "(author_name:(%(name)s) OR author_alternative_name:(%(name)s))" % {'name': v}) check_params = [ 'title', 'publisher', 'oclc', 'lccn', 'contribtor', 'subject', 'place', 'person', 'time' ] q_list += [ '%s:(%s)' % (k, re_to_esc.sub(r'\\\g<0>', param[k])) for k in check_params if k in param ] if param.get('isbn'): q_list.append('isbn:(%s)' % (normalize_isbn(param['isbn']) or param['isbn'])) return (q_list, use_dismax)
def get_amazon_metadata(isbn): try: isbn = normalize_isbn(isbn) if isbn: return cached_get_amazon_metadata(isbn) except Exception: return None
def get_betterworldbooks_metadata(isbn): isbn = normalize_isbn(isbn) try: if isbn: return _get_betterworldbooks_metadata(isbn) except Exception: return {}
def GET(self, isbn): isbn = normalize_isbn(isbn) isbn_type = 'isbn_' + ('13' if len(isbn) == 13 else '10') metadata = { 'amazon': get_amazon_metadata(isbn) or {}, 'betterworldbooks': get_betterworldbooks_metadata(isbn) or {} } # if bwb fails and isbn10, try again with isbn13 if len(isbn) == 10 and \ metadata['betterworldbooks'].get('price') is None: isbn_13 = isbn_10_to_isbn_13(isbn) metadata['betterworldbooks'] = get_betterworldbooks_metadata( isbn_13) or {} # fetch book by isbn if it exists book = web.ctx.site.things({ 'type': '/type/edition', isbn_type: isbn, }) # if no OL edition for isbn, attempt to create if (not book) and metadata.get('amazon'): book = load(clean_amazon_metadata_for_load( metadata.get('amazon'))) # include ol edition metadata in response, if available if book: ed = web.ctx.site.get(book[0]) if ed: metadata['key'] = ed.key if getattr(ed, 'ocaid'): metadata['ocaid'] = ed.ocaid return simplejson.dumps(metadata)
def get_amazon_metadata(isbn): try: isbn = normalize_isbn(isbn) if isbn: return cached_get_amazon_metadata(isbn) except Exception: return None
def _get_amazon_metadata(id_, id_type='isbn'): """Uses the Amazon Product Advertising API ItemLookup operation to locatate a specific book by identifier; either 'isbn' or 'asin'. https://docs.aws.amazon.com/AWSECommerceService/latest/DG/ItemLookup.html :param str id_: The item id: isbn (10/13), or Amazon ASIN. :param str id_type: 'isbn' or 'asin'. :return: A single book item's metadata, or None. :rtype: dict or None """ kwargs = {} if id_type == 'isbn': id_ = normalize_isbn(id_) kwargs = {'SearchIndex': 'Books', 'IdType': 'ISBN'} kwargs['ItemId'] = id_ kwargs[ 'MerchantId'] = 'Amazon' # Only affects Offers Response Group, does Amazon sell this directly? if not lending.amazon_api: raise Exception( "Open Library is not configured to access Amazon's API") try: product = lending.amazon_api.lookup(**kwargs) except Exception: return None # when more than 1 product returned, choose first if isinstance(product, list): product = product[0] return _serialize_amazon_product(product)
def parse_query_fields(q): found = [(m.start(), m.end()) for m in re_fields.finditer(q)] first = q[:found[0][0]].strip() if found else q.strip() if first: yield {'field': 'text', 'value': first.replace(':', r'\:')} for field_num in range(len(found)): op_found = None f = found[field_num] field_name = q[f[0]:f[1] - 1].lower() if field_name in FIELD_NAME_MAP: field_name = FIELD_NAME_MAP[field_name] if field_num == len(found) - 1: v = q[f[1]:].strip() else: v = q[f[1]:found[field_num + 1][0]].strip() m = re_op.search(v) if m: v = v[:-len(m.group(0))] op_found = m.group(1) if field_name == 'isbn': isbn = normalize_isbn(v) if isbn: v = isbn yield {'field': field_name, 'value': v.replace(':', r'\:')} if op_found: yield {'op': op_found}
def _get_amazon_metadata(id_, id_type='isbn', resources=None): """Uses the Amazon Product Advertising API ItemLookup operation to locatate a specific book by identifier; either 'isbn' or 'asin'. https://docs.aws.amazon.com/AWSECommerceService/latest/DG/ItemLookup.html :param str id_: The item id: isbn (10/13), or Amazon ASIN. :param str id_type: 'isbn' or 'asin'. :return: A single book item's metadata, or None. :rtype: dict or None """ if not affiliate_server_url: return None if id_type == 'isbn': id_ = normalize_isbn(id_) if len(id_) == 13 and id_.startswith('978'): id_ = isbn_13_to_isbn_10(id_) try: r = requests.get('http://%s/isbn/%s' % (affiliate_server_url, id_)) r.raise_for_status() return r.json().get('hit') or None except requests.exceptions.ConnectionError: logger.exception("Affiliate Server unreachable") except requests.exceptions.HTTPError: logger.exception("Affiliate Server: id {} not found".format(id_)) return None
def get_betterworldbooks_metadata(isbn): isbn = normalize_isbn(isbn) try: if isbn: return _get_betterworldbooks_metadata(isbn) except Exception: return {}
def isbn_redirect(self, isbn_param): isbn = normalize_isbn(isbn_param) if not isbn: return ed = Edition.from_isbn(isbn) if ed: web.seeother(ed.key)
def isbn_redirect(self, isbn_param): isbn = normalize_isbn(isbn_param) if not isbn: return editions = [] for isbn_len in (10, 13): qisbn = isbn if len(isbn) == isbn_len else opposite_isbn(isbn) q = {'type': '/type/edition', 'isbn_%d' % isbn_len: qisbn} editions += web.ctx.site.things(q) if len(editions): raise web.seeother(editions[0])
def get_betterworldbooks_metadata(isbn): """ :param str isbn: Unnormalisied ISBN10 or ISBN13 :return: Metadata for a single BWB book, currently listed on their catalog, or error dict. :rtype: dict """ isbn = normalize_isbn(isbn) try: return _get_betterworldbooks_metadata(isbn) except Exception: return betterworldbooks_fmt(isbn)
def _get_amazon_metadata(isbn=None): # XXX @hornc, you should be extending this to work with # isbn=, asin=, title=, authors=, etc isbn = normalize_isbn(isbn) try: if not lending.amazon_api: raise Exception product = lending.amazon_api.lookup( ItemId=isbn, IdType="ISBN", SearchIndex="Books") except Exception as e: return None price_fmt, price, qlt = (None, None, None) used = product._safe_get_element_text('OfferSummary.LowestUsedPrice.Amount') new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount') # prioritize lower prices and newer, all things being equal if used and new: price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new') # accept whichever is available elif used or new: price, qlt = (used, 'used') if used else (new, 'new') if price: price = '{:00,.2f}'.format(int(price)/100.) if qlt: price_fmt = "$%s (%s)" % (price, qlt) data = { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % ( isbn, h.affiliate_id('amazon')), 'price': price_fmt, 'price_amt': price, 'qlt': qlt, 'title': product.title, 'authors': [{'name': name} for name in product.authors], 'publish_date': product.publication_date.strftime('%b %d, %Y'), 'source_records': ['amazon:%s' % product.asin], 'number_of_pages': product.pages, 'languages': list(product.languages), # needs to be normalized 'cover': product.large_image_url, } if product.publisher: data['publishers'] = [product.publisher] if len(isbn) == 10: data['isbn_10'] = [isbn] data['isbn_13'] = [isbn_10_to_isbn_13(isbn)] if len(isbn) == 13: data['isbn_13'] = [isbn] if isbn.startswith('978'): data['isbn_10'] = [isbn_13_to_isbn_10(isbn)] return data
def GET(self): # @hornc, add: title='', asin='', authors='' i = web.input(isbn='', asin='') if not (i.isbn or i.asin): return simplejson.dumps({ 'error': 'isbn or asin required' }) id_ = i.asin if i.asin else normalize_isbn(i.isbn) id_type = 'asin' if i.asin else 'isbn_' + ('13' if len(id_) == 13 else '10') metadata = { 'amazon': get_amazon_metadata(id_) or {}, 'betterworldbooks': get_betterworldbooks_metadata(id_) if id_type.startswith('isbn_') else {} } # if isbn_13 fails for amazon, we may want to check isbn_10 also # xxx # if bwb fails and isbn10, try again with isbn13 if id_type == 'isbn_10' and \ metadata['betterworldbooks'].get('price') is None: isbn_13 = isbn_10_to_isbn_13(id_) metadata['betterworldbooks'] = isbn_13 and get_betterworldbooks_metadata( isbn_13) or {} # fetch book by isbn if it exists # if asin... for now, it will fail (which is fine) matches = web.ctx.site.things({ 'type': '/type/edition', id_type: id_, }) book_key = matches[0] if matches else None # if no OL edition for isbn, attempt to create if (not book_key) and metadata.get('amazon'): resp = load(clean_amazon_metadata_for_load( metadata.get('amazon'))) if resp and 'edition' in resp: book_key = resp.get('edition').get('key') # include ol edition metadata in response, if available if book_key: ed = web.ctx.site.get(book_key) if ed: metadata['key'] = ed.key if getattr(ed, 'ocaid'): metadata['ocaid'] = ed.ocaid return simplejson.dumps(metadata)
def GET(self): # @hornc, add: title='', asin='', authors='' i = web.input(isbn='', asin='') if not (i.isbn or i.asin): return simplejson.dumps({'error': 'isbn or asin required'}) id_ = i.asin if i.asin else normalize_isbn(i.isbn) id_type = 'asin' if i.asin else 'isbn_' + ( '13' if len(id_) == 13 else '10') metadata = { 'amazon': get_amazon_metadata(id_) or {}, 'betterworldbooks': get_betterworldbooks_metadata(id_) if id_type.startswith('isbn_') else {} } # if isbn_13 fails for amazon, we may want to check isbn_10 also # xxx # if bwb fails and isbn10, try again with isbn13 if id_type == 'isbn_10' and \ metadata['betterworldbooks'].get('price') is None: isbn_13 = isbn_10_to_isbn_13(id_) metadata[ 'betterworldbooks'] = isbn_13 and get_betterworldbooks_metadata( isbn_13) or {} # fetch book by isbn if it exists # if asin... for now, it will fail (which is fine) matches = web.ctx.site.things({ 'type': '/type/edition', id_type: id_, }) book_key = matches[0] if matches else None # if no OL edition for isbn, attempt to create if (not book_key) and metadata.get('amazon'): resp = load(clean_amazon_metadata_for_load(metadata.get('amazon'))) if resp and 'edition' in resp: book_key = resp.get('edition').get('key') # include ol edition metadata in response, if available if book_key: ed = web.ctx.site.get(book_key) if ed: metadata['key'] = ed.key if getattr(ed, 'ocaid'): metadata['ocaid'] = ed.ocaid return simplejson.dumps(metadata)
def get_betterworldbooks_metadata(isbn: str) -> Optional[dict]: """ :param str isbn: Unnormalisied ISBN10 or ISBN13 :return: Metadata for a single BWB book, currently listed on their catalog, or an error dict. :rtype: dict or None """ isbn = normalize_isbn(isbn) try: return _get_betterworldbooks_metadata(isbn) except Exception: logger.exception(f"_get_betterworldbooks_metadata({isbn})") return betterworldbooks_fmt(isbn)
def get_betterworldbooks_metadata(isbn, thirdparty=False): """ :param str isbn: Unnormalisied ISBN10 or ISBN13 :param bool thirdparty: If no Product API match, scrape bwb website for 3rd party matches :return: Metadata for a single BWB book, currently listed on their catalog, or error dict. :rtype: dict """ isbn = normalize_isbn(isbn) try: metadata = _get_betterworldbooks_metadata(isbn) if not metadata.get('price') and thirdparty: return _get_betterworldbooks_thirdparty_metadata(isbn) return metadata except Exception: return betterworldbooks_fmt(isbn)
def GET(self): i = web.input(isbn='', asin='') if not (i.isbn or i.asin): return json.dumps({'error': 'isbn or asin required'}) id_ = i.asin if i.asin else normalize_isbn(i.isbn) id_type = 'asin' if i.asin else 'isbn_' + ('13' if len(id_) == 13 else '10') metadata = { 'amazon': get_amazon_metadata(id_, id_type=id_type[:4]) or {}, 'betterworldbooks': get_betterworldbooks_metadata(id_) if id_type.startswith('isbn_') else {}, } # if user supplied isbn_{n} fails for amazon, we may want to check the alternate isbn # if bwb fails and isbn10, try again with isbn13 if id_type == 'isbn_10' and metadata['betterworldbooks'].get('price') is None: isbn_13 = isbn_10_to_isbn_13(id_) metadata['betterworldbooks'] = ( isbn_13 and get_betterworldbooks_metadata(isbn_13) or {} ) # fetch book by isbn if it exists # TODO: perform existing OL lookup by ASIN if supplied, if possible matches = web.ctx.site.things( { 'type': '/type/edition', id_type: id_, } ) book_key = matches[0] if matches else None # if no OL edition for isbn, attempt to create if (not book_key) and metadata.get('amazon'): book_key = create_edition_from_amazon_metadata(id_, id_type[:4]) # include ol edition metadata in response, if available if book_key: ed = web.ctx.site.get(book_key) if ed: metadata['key'] = ed.key if getattr(ed, 'ocaid'): metadata['ocaid'] = ed.ocaid return json.dumps(metadata)
def _get_amazon_metadata(id_, id_type='isbn', resources=None): """Uses the Amazon Product Advertising API ItemLookup operation to locatate a specific book by identifier; either 'isbn' or 'asin'. https://docs.aws.amazon.com/AWSECommerceService/latest/DG/ItemLookup.html :param str id_: The item id: isbn (10/13), or Amazon ASIN. :param str id_type: 'isbn' or 'asin'. :return: A single book item's metadata, or None. :rtype: dict or None """ if id_type == 'isbn': id_ = normalize_isbn(id_) if len(id_) == 13 and id_.startswith('978'): id_ = isbn_13_to_isbn_10(id_) if amazon_api: try: return amazon_api.get_product(id_, serialize=True, resources=resources) except Exception: return None
def _get_amazon_metadata( id_: str, id_type: str = 'isbn', resources=None, retries: int = 3, sleep_sec: float = 0.1, ) -> Optional[dict]: """Uses the Amazon Product Advertising API ItemLookup operation to locatate a specific book by identifier; either 'isbn' or 'asin'. https://docs.aws.amazon.com/AWSECommerceService/latest/DG/ItemLookup.html :param str id_: The item id: isbn (10/13), or Amazon ASIN. :param str id_type: 'isbn' or 'asin'. :param resources: Used for AWSE Commerce Service lookup -- See Amazon docs :param int retries: Number of times to query affiliate server before returning None :param float sleep_sec: Delay time.sleep(sleep_sec) seconds before each retry :return: A single book item's metadata, or None. :rtype: dict or None """ if not affiliate_server_url: return None if id_type == 'isbn': id_ = normalize_isbn(id_) if len(id_) == 13 and id_.startswith('978'): id_ = isbn_13_to_isbn_10(id_) try: r = requests.get(f'http://{affiliate_server_url}/isbn/{id_}') r.raise_for_status() if hit := r.json().get('hit'): return hit if retries <= 1: return None time.sleep(sleep_sec) # sleep before recursive call return _get_amazon_metadata(id_, id_type, resources, retries - 1, sleep_sec)
def test_normalize_isbn(isbnlike, expected): assert normalize_isbn(isbnlike) == expected
def _get_amazon_metadata(id_=None, id_type='isbn'): # TODO: extend this to work with # isbn=, asin=, title=, authors=, etc kwargs = {} if id_type == 'isbn': id_ = normalize_isbn(id_) kwargs = {'SearchIndex': 'Books', 'IdType': 'ISBN'} kwargs['ItemId'] = id_ try: if not lending.amazon_api: raise Exception product = lending.amazon_api.lookup(**kwargs) # sometimes more than one product can be returned, choose first if isinstance(product, list): product = product[0] except Exception as e: return None price_fmt, price, qlt = (None, None, None) used = product._safe_get_element_text( 'OfferSummary.LowestUsedPrice.Amount') new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount') # prioritize lower prices and newer, all things being equal if used and new: price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new') # accept whichever is available elif used or new: price, qlt = (used, 'used') if used else (new, 'new') if price: price = '{:00,.2f}'.format(int(price) / 100.) if qlt: price_fmt = "$%s (%s)" % (price, qlt) data = { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % (id_, h.affiliate_id('amazon')), 'price': price_fmt, 'price_amt': price, 'qlt': qlt, 'title': product.title, 'authors': [{ 'name': name } for name in product.authors], 'source_records': ['amazon:%s' % product.asin], 'number_of_pages': product.pages, 'languages': list(product.languages), # needs to be normalized 'cover': product.large_image_url, 'product_group': product.product_group, } if product.publication_date: # TODO: Don't populate false month and day for older products data['publish_date'] = product.publication_date.strftime('%b %d, %Y') if product.binding: data['physical_format'] = product.binding.lower() if product.edition: data['edition'] = product.edition if product.publisher: data['publishers'] = [product.publisher] if product.isbn: isbn = product.isbn if len(isbn) == 10: data['isbn_10'] = [isbn] data['isbn_13'] = [isbn_10_to_isbn_13(isbn)] elif len(isbn) == 13: data['isbn_13'] = [isbn] if isbn.startswith('978'): data['isbn_10'] = [isbn_13_to_isbn_10(isbn)] return data
def test_normalize_isbn_returns_None(): assert normalize_isbn(None) is None assert normalize_isbn('') is None assert normalize_isbn('a') is None