Exemplo n.º 1
0
def build_q_list(param):
    q_list = []
    if 'q' in param:
        # Solr 4+ has support for regexes (eg `key:/foo.*/`)! But for now, let's not
        # expose that and escape all '/'. Otherwise `key:/works/OL1W` is interpreted as
        # a regex.
        q_param = param['q'].strip().replace('/', '\\/')
    else:
        q_param = None
    use_dismax = False
    if q_param:
        if q_param == '*:*':
            q_list.append(q_param)
        elif 'NOT ' in q_param:  # this is a hack
            q_list.append(q_param.strip())
        elif re_fields.search(q_param):
            q_list.extend(i['op'] if 'op' in
                          i else '{}:({})'.format(i['field'], i['value'])
                          for i in parse_query_fields(q_param))
        else:
            isbn = normalize_isbn(q_param)
            if isbn and len(isbn) in (10, 13):
                q_list.append('isbn:(%s)' % isbn)
            else:
                q_list.append(q_param.strip().replace(':', r'\:'))
                use_dismax = True
    else:
        if 'author' in param:
            v = param['author'].strip()
            m = re_author_key.search(v)
            if m:
                q_list.append("author_key:(%s)" % m.group(1))
            else:
                v = re_to_esc.sub(r'\\\g<0>', v)
                # Somehow v can be empty at this point,
                #   passing the following with empty strings causes a severe error in SOLR
                if v:
                    q_list.append(
                        "(author_name:({name}) OR author_alternative_name:({name}))"
                        .format(name=v))

        check_params = [
            'title',
            'publisher',
            'oclc',
            'lccn',
            'contributor',
            'subject',
            'place',
            'person',
            'time',
        ]
        q_list += [
            '{}:({})'.format(k, re_to_esc.sub(r'\\\g<0>', param[k]))
            for k in check_params if k in param
        ]
        if param.get('isbn'):
            q_list.append('isbn:(%s)' %
                          (normalize_isbn(param['isbn']) or param['isbn']))
    return (q_list, use_dismax)
Exemplo n.º 2
0
def normalize_record_isbns(rec):
    """
    Returns the Edition import record with all ISBN fields cleaned.

    :param dict rec: Edition import record
    :rtype: dict
    :return: A record with cleaned ISBNs in the various possible ISBN locations.
    """
    for field in ('isbn_13', 'isbn_10', 'isbn'):
        if rec.get(field):
            rec[field] = [normalize_isbn(isbn) for isbn in rec.get(field) if normalize_isbn(isbn)]
    return rec
Exemplo n.º 3
0
def build_q_list(param):
    q_list = []
    if 'q' in param:
        q_param = param['q'].strip()
    else:
        q_param = None
    use_dismax = False
    if q_param:
        if q_param == '*:*':
            q_list.append(q_param)
        elif 'NOT ' in q_param:  # this is a hack
            q_list.append(q_param.strip())
        elif re_fields.search(q_param):
            q_list.extend(i['op'] if 'op' in i else '%s:(%s)' %
                          (i['field'], i['value'])
                          for i in parse_query_fields(q_param))
        else:
            isbn = normalize_isbn(q_param)
            if isbn and len(isbn) in (10, 13):
                q_list.append('isbn:(%s)' % isbn)
            else:
                q_list.append(q_param.strip().replace(':', r'\:'))
                use_dismax = True
    else:
        if 'author' in param:
            v = param['author'].strip()
            m = re_author_key.search(v)
            if m:
                q_list.append("author_key:(%s)" % m.group(1))
            else:
                v = re_to_esc.sub(r'\\\g<0>', v)
                # Somehow v can be empty at this point,
                #   passing the following with empty strings causes a severe error in SOLR
                if v:
                    q_list.append(
                        "(author_name:(%(name)s) OR author_alternative_name:(%(name)s))"
                        % {'name': v})

        check_params = [
            'title', 'publisher', 'oclc', 'lccn', 'contribtor', 'subject',
            'place', 'person', 'time'
        ]
        q_list += [
            '%s:(%s)' % (k, re_to_esc.sub(r'\\\g<0>', param[k]))
            for k in check_params if k in param
        ]
        if param.get('isbn'):
            q_list.append('isbn:(%s)' %
                          (normalize_isbn(param['isbn']) or param['isbn']))
    return (q_list, use_dismax)
Exemplo n.º 4
0
def get_amazon_metadata(isbn):
    try:
        isbn = normalize_isbn(isbn)
        if isbn:
            return cached_get_amazon_metadata(isbn)
    except Exception:
        return None
Exemplo n.º 5
0
def get_betterworldbooks_metadata(isbn):
    isbn = normalize_isbn(isbn)
    try:
        if isbn:
            return _get_betterworldbooks_metadata(isbn)
    except Exception:
        return {}
Exemplo n.º 6
0
    def GET(self, isbn):
        isbn = normalize_isbn(isbn)
        isbn_type = 'isbn_' + ('13' if len(isbn) == 13 else '10')
        metadata = {
            'amazon': get_amazon_metadata(isbn) or {},
            'betterworldbooks': get_betterworldbooks_metadata(isbn) or {}
        }
        # if bwb fails and isbn10, try again with isbn13
        if len(isbn) == 10 and \
           metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(isbn)
            metadata['betterworldbooks'] = get_betterworldbooks_metadata(
                isbn_13) or {}

        # fetch book by isbn if it exists
        book = web.ctx.site.things({
            'type': '/type/edition',
            isbn_type: isbn,
        })

        # if no OL edition for isbn, attempt to create
        if (not book) and metadata.get('amazon'):
            book = load(clean_amazon_metadata_for_load(
                metadata.get('amazon')))

        # include ol edition metadata in response, if available
        if book:
            ed = web.ctx.site.get(book[0])
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return simplejson.dumps(metadata)
Exemplo n.º 7
0
def get_amazon_metadata(isbn):
    try:
        isbn = normalize_isbn(isbn)
        if isbn:
            return cached_get_amazon_metadata(isbn)
    except Exception:
        return None
Exemplo n.º 8
0
def _get_amazon_metadata(id_, id_type='isbn'):
    """Uses the Amazon Product Advertising API ItemLookup operation to locatate a
    specific book by identifier; either 'isbn' or 'asin'.
    https://docs.aws.amazon.com/AWSECommerceService/latest/DG/ItemLookup.html

    :param str id_: The item id: isbn (10/13), or Amazon ASIN.
    :param str id_type: 'isbn' or 'asin'.
    :return: A single book item's metadata, or None.
    :rtype: dict or None
    """

    kwargs = {}
    if id_type == 'isbn':
        id_ = normalize_isbn(id_)
        kwargs = {'SearchIndex': 'Books', 'IdType': 'ISBN'}
    kwargs['ItemId'] = id_
    kwargs[
        'MerchantId'] = 'Amazon'  # Only affects Offers Response Group, does Amazon sell this directly?

    if not lending.amazon_api:
        raise Exception(
            "Open Library is not configured to access Amazon's API")
    try:
        product = lending.amazon_api.lookup(**kwargs)
    except Exception:
        return None
    # when more than 1 product returned, choose first
    if isinstance(product, list):
        product = product[0]
    return _serialize_amazon_product(product)
Exemplo n.º 9
0
def parse_query_fields(q):
    found = [(m.start(), m.end()) for m in re_fields.finditer(q)]
    first = q[:found[0][0]].strip() if found else q.strip()
    if first:
        yield {'field': 'text', 'value': first.replace(':', r'\:')}
    for field_num in range(len(found)):
        op_found = None
        f = found[field_num]
        field_name = q[f[0]:f[1] - 1].lower()
        if field_name in FIELD_NAME_MAP:
            field_name = FIELD_NAME_MAP[field_name]
        if field_num == len(found) - 1:
            v = q[f[1]:].strip()
        else:
            v = q[f[1]:found[field_num + 1][0]].strip()
            m = re_op.search(v)
            if m:
                v = v[:-len(m.group(0))]
                op_found = m.group(1)
        if field_name == 'isbn':
            isbn = normalize_isbn(v)
            if isbn:
                v = isbn
        yield {'field': field_name, 'value': v.replace(':', r'\:')}
        if op_found:
            yield {'op': op_found}
Exemplo n.º 10
0
def _get_amazon_metadata(id_, id_type='isbn', resources=None):
    """Uses the Amazon Product Advertising API ItemLookup operation to locatate a
    specific book by identifier; either 'isbn' or 'asin'.
    https://docs.aws.amazon.com/AWSECommerceService/latest/DG/ItemLookup.html

    :param str id_: The item id: isbn (10/13), or Amazon ASIN.
    :param str id_type: 'isbn' or 'asin'.
    :return: A single book item's metadata, or None.
    :rtype: dict or None
    """
    if not affiliate_server_url:
        return None

    if id_type == 'isbn':
        id_ = normalize_isbn(id_)
        if len(id_) == 13 and id_.startswith('978'):
            id_ = isbn_13_to_isbn_10(id_)

    try:
        r = requests.get('http://%s/isbn/%s' % (affiliate_server_url, id_))
        r.raise_for_status()
        return r.json().get('hit') or None
    except requests.exceptions.ConnectionError:
        logger.exception("Affiliate Server unreachable")
    except requests.exceptions.HTTPError:
        logger.exception("Affiliate Server: id {} not found".format(id_))
    return None
Exemplo n.º 11
0
def get_betterworldbooks_metadata(isbn):
    isbn = normalize_isbn(isbn)
    try:
        if isbn:
            return _get_betterworldbooks_metadata(isbn)
    except Exception:
        return {}
Exemplo n.º 12
0
    def isbn_redirect(self, isbn_param):
        isbn = normalize_isbn(isbn_param)
        if not isbn:
            return

        ed = Edition.from_isbn(isbn)
        if ed:
            web.seeother(ed.key)
Exemplo n.º 13
0
 def isbn_redirect(self, isbn_param):
     isbn = normalize_isbn(isbn_param)
     if not isbn:
         return
     editions = []
     for isbn_len in (10, 13):
         qisbn = isbn if len(isbn) == isbn_len else opposite_isbn(isbn)
         q = {'type': '/type/edition', 'isbn_%d' % isbn_len: qisbn}
         editions += web.ctx.site.things(q)
     if len(editions):
         raise web.seeother(editions[0])
Exemplo n.º 14
0
def get_betterworldbooks_metadata(isbn):
    """
    :param str isbn: Unnormalisied ISBN10 or ISBN13
    :return: Metadata for a single BWB book, currently listed on their catalog, or error dict.
    :rtype: dict
    """

    isbn = normalize_isbn(isbn)
    try:
        return _get_betterworldbooks_metadata(isbn)
    except Exception:
        return betterworldbooks_fmt(isbn)
Exemplo n.º 15
0
def _get_amazon_metadata(isbn=None):
    # XXX @hornc, you should be extending this to work with
    # isbn=, asin=, title=, authors=, etc
    isbn = normalize_isbn(isbn)
    try:
        if not lending.amazon_api:
            raise Exception
        product = lending.amazon_api.lookup(
            ItemId=isbn, IdType="ISBN", SearchIndex="Books")
    except Exception as e:
        return None

    price_fmt, price, qlt = (None, None, None)
    used = product._safe_get_element_text('OfferSummary.LowestUsedPrice.Amount')
    new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount')

    # prioritize lower prices and newer, all things being equal
    if used and new:
        price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new')
    # accept whichever is available
    elif used or new:
        price, qlt = (used, 'used') if used else (new, 'new')

    if price:
        price = '{:00,.2f}'.format(int(price)/100.)
        if qlt:
            price_fmt = "$%s (%s)" % (price, qlt)

    data = {
        'url': "https://www.amazon.com/dp/%s/?tag=%s" % (
            isbn, h.affiliate_id('amazon')),
        'price': price_fmt,
        'price_amt': price,
        'qlt': qlt,
        'title': product.title,
        'authors': [{'name': name} for name in product.authors],
        'publish_date': product.publication_date.strftime('%b %d, %Y'),
        'source_records': ['amazon:%s' % product.asin],
        'number_of_pages': product.pages,
        'languages': list(product.languages),  # needs to be normalized
        'cover': product.large_image_url,
    }
    if product.publisher:
        data['publishers'] = [product.publisher]
    if len(isbn) == 10:
        data['isbn_10'] = [isbn]
        data['isbn_13'] = [isbn_10_to_isbn_13(isbn)]
    if len(isbn) == 13:
        data['isbn_13'] = [isbn]
        if isbn.startswith('978'):
            data['isbn_10'] = [isbn_13_to_isbn_10(isbn)]
    return data
Exemplo n.º 16
0
    def GET(self):
        # @hornc, add: title='', asin='', authors=''
        i = web.input(isbn='', asin='')

        if not (i.isbn or i.asin):
            return simplejson.dumps({
                'error': 'isbn or asin required'
            })

        id_ = i.asin if i.asin else normalize_isbn(i.isbn)
        id_type = 'asin' if i.asin else 'isbn_' + ('13' if len(id_) == 13 else '10')

        metadata = {
            'amazon': get_amazon_metadata(id_) or {},
            'betterworldbooks': get_betterworldbooks_metadata(id_) if id_type.startswith('isbn_') else {}
        }
        # if isbn_13 fails for amazon, we may want to check isbn_10 also
        # xxx

        # if bwb fails and isbn10, try again with isbn13
        if id_type == 'isbn_10' and \
           metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(id_)
            metadata['betterworldbooks'] = isbn_13 and get_betterworldbooks_metadata(
                isbn_13) or {}

        # fetch book by isbn if it exists
        # if asin... for now, it will fail (which is fine)
        matches = web.ctx.site.things({
            'type': '/type/edition',
            id_type: id_,
        })

        book_key = matches[0] if matches else None

        # if no OL edition for isbn, attempt to create
        if (not book_key) and metadata.get('amazon'):
            resp = load(clean_amazon_metadata_for_load(
                metadata.get('amazon')))
            if resp and 'edition' in resp:
                book_key = resp.get('edition').get('key')

        # include ol edition metadata in response, if available
        if book_key:
            ed = web.ctx.site.get(book_key)
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return simplejson.dumps(metadata)
Exemplo n.º 17
0
    def GET(self):
        # @hornc, add: title='', asin='', authors=''
        i = web.input(isbn='', asin='')

        if not (i.isbn or i.asin):
            return simplejson.dumps({'error': 'isbn or asin required'})

        id_ = i.asin if i.asin else normalize_isbn(i.isbn)
        id_type = 'asin' if i.asin else 'isbn_' + (
            '13' if len(id_) == 13 else '10')

        metadata = {
            'amazon': get_amazon_metadata(id_) or {},
            'betterworldbooks': get_betterworldbooks_metadata(id_)
            if id_type.startswith('isbn_') else {}
        }
        # if isbn_13 fails for amazon, we may want to check isbn_10 also
        # xxx

        # if bwb fails and isbn10, try again with isbn13
        if id_type == 'isbn_10' and \
           metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(id_)
            metadata[
                'betterworldbooks'] = isbn_13 and get_betterworldbooks_metadata(
                    isbn_13) or {}

        # fetch book by isbn if it exists
        # if asin... for now, it will fail (which is fine)
        matches = web.ctx.site.things({
            'type': '/type/edition',
            id_type: id_,
        })

        book_key = matches[0] if matches else None

        # if no OL edition for isbn, attempt to create
        if (not book_key) and metadata.get('amazon'):
            resp = load(clean_amazon_metadata_for_load(metadata.get('amazon')))
            if resp and 'edition' in resp:
                book_key = resp.get('edition').get('key')

        # include ol edition metadata in response, if available
        if book_key:
            ed = web.ctx.site.get(book_key)
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return simplejson.dumps(metadata)
Exemplo n.º 18
0
def get_betterworldbooks_metadata(isbn: str) -> Optional[dict]:
    """
    :param str isbn: Unnormalisied ISBN10 or ISBN13
    :return: Metadata for a single BWB book, currently listed on their catalog, or
             an error dict.
    :rtype: dict or None
    """

    isbn = normalize_isbn(isbn)
    try:
        return _get_betterworldbooks_metadata(isbn)
    except Exception:
        logger.exception(f"_get_betterworldbooks_metadata({isbn})")
        return betterworldbooks_fmt(isbn)
Exemplo n.º 19
0
def get_betterworldbooks_metadata(isbn, thirdparty=False):
    """
    :param str isbn: Unnormalisied ISBN10 or ISBN13
    :param bool thirdparty: If no Product API  match, scrape bwb website for 3rd party matches
    :return: Metadata for a single BWB book, currently listed on their catalog, or error dict.
    :rtype: dict
    """

    isbn = normalize_isbn(isbn)
    try:
        metadata = _get_betterworldbooks_metadata(isbn)
        if not metadata.get('price') and thirdparty:
            return _get_betterworldbooks_thirdparty_metadata(isbn)
        return metadata
    except Exception:
        return betterworldbooks_fmt(isbn)
Exemplo n.º 20
0
    def GET(self):
        i = web.input(isbn='', asin='')
        if not (i.isbn or i.asin):
            return json.dumps({'error': 'isbn or asin required'})
        id_ = i.asin if i.asin else normalize_isbn(i.isbn)
        id_type = 'asin' if i.asin else 'isbn_' + ('13' if len(id_) == 13 else '10')

        metadata = {
            'amazon': get_amazon_metadata(id_, id_type=id_type[:4]) or {},
            'betterworldbooks': get_betterworldbooks_metadata(id_)
            if id_type.startswith('isbn_')
            else {},
        }
        # if user supplied isbn_{n} fails for amazon, we may want to check the alternate isbn

        # if bwb fails and isbn10, try again with isbn13
        if id_type == 'isbn_10' and metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(id_)
            metadata['betterworldbooks'] = (
                isbn_13 and get_betterworldbooks_metadata(isbn_13) or {}
            )

        # fetch book by isbn if it exists
        # TODO: perform existing OL lookup by ASIN if supplied, if possible
        matches = web.ctx.site.things(
            {
                'type': '/type/edition',
                id_type: id_,
            }
        )

        book_key = matches[0] if matches else None

        # if no OL edition for isbn, attempt to create
        if (not book_key) and metadata.get('amazon'):
            book_key = create_edition_from_amazon_metadata(id_, id_type[:4])

        # include ol edition metadata in response, if available
        if book_key:
            ed = web.ctx.site.get(book_key)
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return json.dumps(metadata)
Exemplo n.º 21
0
def _get_amazon_metadata(id_, id_type='isbn', resources=None):
    """Uses the Amazon Product Advertising API ItemLookup operation to locatate a
    specific book by identifier; either 'isbn' or 'asin'.
    https://docs.aws.amazon.com/AWSECommerceService/latest/DG/ItemLookup.html

    :param str id_: The item id: isbn (10/13), or Amazon ASIN.
    :param str id_type: 'isbn' or 'asin'.
    :return: A single book item's metadata, or None.
    :rtype: dict or None
    """
    if id_type == 'isbn':
        id_ = normalize_isbn(id_)
        if len(id_) == 13 and id_.startswith('978'):
            id_ = isbn_13_to_isbn_10(id_)

    if amazon_api:
        try:
            return amazon_api.get_product(id_, serialize=True, resources=resources)
        except Exception:
            return None
Exemplo n.º 22
0
def _get_amazon_metadata(
    id_: str,
    id_type: str = 'isbn',
    resources=None,
    retries: int = 3,
    sleep_sec: float = 0.1,
) -> Optional[dict]:
    """Uses the Amazon Product Advertising API ItemLookup operation to locatate a
    specific book by identifier; either 'isbn' or 'asin'.
    https://docs.aws.amazon.com/AWSECommerceService/latest/DG/ItemLookup.html

    :param str id_: The item id: isbn (10/13), or Amazon ASIN.
    :param str id_type: 'isbn' or 'asin'.
    :param resources: Used for AWSE Commerce Service lookup -- See Amazon docs
    :param int retries: Number of times to query affiliate server before returning None
    :param float sleep_sec: Delay time.sleep(sleep_sec) seconds before each retry
    :return: A single book item's metadata, or None.
    :rtype: dict or None
    """
    if not affiliate_server_url:
        return None

    if id_type == 'isbn':
        id_ = normalize_isbn(id_)
        if len(id_) == 13 and id_.startswith('978'):
            id_ = isbn_13_to_isbn_10(id_)

    try:
        r = requests.get(f'http://{affiliate_server_url}/isbn/{id_}')
        r.raise_for_status()
        if hit := r.json().get('hit'):
            return hit
        if retries <= 1:
            return None
        time.sleep(sleep_sec)  # sleep before recursive call
        return _get_amazon_metadata(id_, id_type, resources, retries - 1,
                                    sleep_sec)
Exemplo n.º 23
0
def test_normalize_isbn(isbnlike, expected):
    assert normalize_isbn(isbnlike) == expected
Exemplo n.º 24
0
def _get_amazon_metadata(id_=None, id_type='isbn'):
    # TODO: extend this to work with
    # isbn=, asin=, title=, authors=, etc
    kwargs = {}
    if id_type == 'isbn':
        id_ = normalize_isbn(id_)
        kwargs = {'SearchIndex': 'Books', 'IdType': 'ISBN'}
    kwargs['ItemId'] = id_
    try:
        if not lending.amazon_api:
            raise Exception
        product = lending.amazon_api.lookup(**kwargs)
        # sometimes more than one product can be returned, choose first
        if isinstance(product, list):
            product = product[0]
    except Exception as e:
        return None

    price_fmt, price, qlt = (None, None, None)
    used = product._safe_get_element_text(
        'OfferSummary.LowestUsedPrice.Amount')
    new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount')

    # prioritize lower prices and newer, all things being equal
    if used and new:
        price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new')
    # accept whichever is available
    elif used or new:
        price, qlt = (used, 'used') if used else (new, 'new')

    if price:
        price = '{:00,.2f}'.format(int(price) / 100.)
        if qlt:
            price_fmt = "$%s (%s)" % (price, qlt)

    data = {
        'url':
        "https://www.amazon.com/dp/%s/?tag=%s" %
        (id_, h.affiliate_id('amazon')),
        'price':
        price_fmt,
        'price_amt':
        price,
        'qlt':
        qlt,
        'title':
        product.title,
        'authors': [{
            'name': name
        } for name in product.authors],
        'source_records': ['amazon:%s' % product.asin],
        'number_of_pages':
        product.pages,
        'languages':
        list(product.languages),  # needs to be normalized
        'cover':
        product.large_image_url,
        'product_group':
        product.product_group,
    }
    if product.publication_date:
        # TODO: Don't populate false month and day for older products
        data['publish_date'] = product.publication_date.strftime('%b %d, %Y')
    if product.binding:
        data['physical_format'] = product.binding.lower()
    if product.edition:
        data['edition'] = product.edition
    if product.publisher:
        data['publishers'] = [product.publisher]
    if product.isbn:
        isbn = product.isbn
        if len(isbn) == 10:
            data['isbn_10'] = [isbn]
            data['isbn_13'] = [isbn_10_to_isbn_13(isbn)]
        elif len(isbn) == 13:
            data['isbn_13'] = [isbn]
            if isbn.startswith('978'):
                data['isbn_10'] = [isbn_13_to_isbn_10(isbn)]
    return data
Exemplo n.º 25
0
def test_normalize_isbn_returns_None():
    assert normalize_isbn(None) is None
    assert normalize_isbn('') is None
    assert normalize_isbn('a') is None