Exemplo n.º 1
0
    def GET(self, isbn):
        isbn = normalize_isbn(isbn)
        isbn_type = 'isbn_' + ('13' if len(isbn) == 13 else '10')
        metadata = {
            'amazon': get_amazon_metadata(isbn) or {},
            'betterworldbooks': get_betterworldbooks_metadata(isbn) or {}
        }
        # if bwb fails and isbn10, try again with isbn13
        if len(isbn) == 10 and \
           metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(isbn)
            metadata['betterworldbooks'] = get_betterworldbooks_metadata(
                isbn_13) or {}

        # fetch book by isbn if it exists
        book = web.ctx.site.things({
            'type': '/type/edition',
            isbn_type: isbn,
        })

        # if no OL edition for isbn, attempt to create
        if (not book) and metadata.get('amazon'):
            book = load(clean_amazon_metadata_for_load(
                metadata.get('amazon')))

        # include ol edition metadata in response, if available
        if book:
            ed = web.ctx.site.get(book[0])
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return simplejson.dumps(metadata)
Exemplo n.º 2
0
 def get_isbn13(self):
     """Fetches either isbn_13 or isbn_10 from record and returns canonical
     isbn_13
     """
     isbn_13 = self.isbn_13 and canonical(self.isbn_13[0])
     if not isbn_13:
         isbn_10 = self.isbn_10 and self.isbn_10[0]
         return isbn_10 and isbn_10_to_isbn_13(isbn_10)
     return isbn_13
Exemplo n.º 3
0
def _get_amazon_metadata(isbn):
    # XXX some esbns may be < 10!
    isbn_10 = isbn if len(isbn) == 10 else isbn_13_to_isbn_10(isbn)
    isbn_13 = isbn if len(isbn) == 13 else isbn_10_to_isbn_13(isbn)
    try:
        if not lending.amazon_api:
            raise Exception
        product = lending.amazon_api.lookup(ItemId=isbn_10)
    except Exception as e:
        return None

    price_fmt, price, qlt = (None, None, None)
    used = product._safe_get_element_text(
        'OfferSummary.LowestUsedPrice.Amount')
    new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount')

    # prioritize lower prices and newer, all things being equal
    if used and new:
        price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new')
    # accept whichever is available
    elif used or new:
        price, qlt = (used, 'used') if used else (new, 'new')

    if price:
        price = '{:00,.2f}'.format(int(price) / 100.)
        if qlt:
            price_fmt = "$%s (%s)" % (price, qlt)

    return {
        'url':
        "https://www.amazon.com/dp/%s/?tag=%s" %
        (isbn, h.affiliate_id('amazon')),
        'price':
        price_fmt,
        'price_amt':
        price,
        'qlt':
        qlt,
        'title':
        product.title,
        'authors': [{
            'name': name
        } for name in product.authors],
        'publish_date':
        product.publication_date.strftime('%b %d, %Y'),
        'source_records': ['amazon:%s' % product.asin],
        'number_of_pages':
        product.pages,
        'languages':
        list(product.languages),  # needs to be normalized
        'publishers': [product.publisher],
        'cover':
        product.large_image_url,
        'isbn_10': [isbn_10],
        'isbn_13': [isbn_13]
    }
Exemplo n.º 4
0
def _get_amazon_metadata(isbn=None):
    # XXX @hornc, you should be extending this to work with
    # isbn=, asin=, title=, authors=, etc
    isbn = normalize_isbn(isbn)
    try:
        if not lending.amazon_api:
            raise Exception
        product = lending.amazon_api.lookup(
            ItemId=isbn, IdType="ISBN", SearchIndex="Books")
    except Exception as e:
        return None

    price_fmt, price, qlt = (None, None, None)
    used = product._safe_get_element_text('OfferSummary.LowestUsedPrice.Amount')
    new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount')

    # prioritize lower prices and newer, all things being equal
    if used and new:
        price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new')
    # accept whichever is available
    elif used or new:
        price, qlt = (used, 'used') if used else (new, 'new')

    if price:
        price = '{:00,.2f}'.format(int(price)/100.)
        if qlt:
            price_fmt = "$%s (%s)" % (price, qlt)

    data = {
        'url': "https://www.amazon.com/dp/%s/?tag=%s" % (
            isbn, h.affiliate_id('amazon')),
        'price': price_fmt,
        'price_amt': price,
        'qlt': qlt,
        'title': product.title,
        'authors': [{'name': name} for name in product.authors],
        'publish_date': product.publication_date.strftime('%b %d, %Y'),
        'source_records': ['amazon:%s' % product.asin],
        'number_of_pages': product.pages,
        'languages': list(product.languages),  # needs to be normalized
        'cover': product.large_image_url,
    }
    if product.publisher:
        data['publishers'] = [product.publisher]
    if len(isbn) == 10:
        data['isbn_10'] = [isbn]
        data['isbn_13'] = [isbn_10_to_isbn_13(isbn)]
    if len(isbn) == 13:
        data['isbn_13'] = [isbn]
        if isbn.startswith('978'):
            data['isbn_10'] = [isbn_13_to_isbn_10(isbn)]
    return data
Exemplo n.º 5
0
    def GET(self):
        # @hornc, add: title='', asin='', authors=''
        i = web.input(isbn='', asin='')

        if not (i.isbn or i.asin):
            return simplejson.dumps({
                'error': 'isbn or asin required'
            })

        id_ = i.asin if i.asin else normalize_isbn(i.isbn)
        id_type = 'asin' if i.asin else 'isbn_' + ('13' if len(id_) == 13 else '10')

        metadata = {
            'amazon': get_amazon_metadata(id_) or {},
            'betterworldbooks': get_betterworldbooks_metadata(id_) if id_type.startswith('isbn_') else {}
        }
        # if isbn_13 fails for amazon, we may want to check isbn_10 also
        # xxx

        # if bwb fails and isbn10, try again with isbn13
        if id_type == 'isbn_10' and \
           metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(id_)
            metadata['betterworldbooks'] = isbn_13 and get_betterworldbooks_metadata(
                isbn_13) or {}

        # fetch book by isbn if it exists
        # if asin... for now, it will fail (which is fine)
        matches = web.ctx.site.things({
            'type': '/type/edition',
            id_type: id_,
        })

        book_key = matches[0] if matches else None

        # if no OL edition for isbn, attempt to create
        if (not book_key) and metadata.get('amazon'):
            resp = load(clean_amazon_metadata_for_load(
                metadata.get('amazon')))
            if resp and 'edition' in resp:
                book_key = resp.get('edition').get('key')

        # include ol edition metadata in response, if available
        if book_key:
            ed = web.ctx.site.get(book_key)
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return simplejson.dumps(metadata)
Exemplo n.º 6
0
    def GET(self):
        # @hornc, add: title='', asin='', authors=''
        i = web.input(isbn='', asin='')

        if not (i.isbn or i.asin):
            return simplejson.dumps({'error': 'isbn or asin required'})

        id_ = i.asin if i.asin else normalize_isbn(i.isbn)
        id_type = 'asin' if i.asin else 'isbn_' + (
            '13' if len(id_) == 13 else '10')

        metadata = {
            'amazon': get_amazon_metadata(id_) or {},
            'betterworldbooks': get_betterworldbooks_metadata(id_)
            if id_type.startswith('isbn_') else {}
        }
        # if isbn_13 fails for amazon, we may want to check isbn_10 also
        # xxx

        # if bwb fails and isbn10, try again with isbn13
        if id_type == 'isbn_10' and \
           metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(id_)
            metadata[
                'betterworldbooks'] = isbn_13 and get_betterworldbooks_metadata(
                    isbn_13) or {}

        # fetch book by isbn if it exists
        # if asin... for now, it will fail (which is fine)
        matches = web.ctx.site.things({
            'type': '/type/edition',
            id_type: id_,
        })

        book_key = matches[0] if matches else None

        # if no OL edition for isbn, attempt to create
        if (not book_key) and metadata.get('amazon'):
            resp = load(clean_amazon_metadata_for_load(metadata.get('amazon')))
            if resp and 'edition' in resp:
                book_key = resp.get('edition').get('key')

        # include ol edition metadata in response, if available
        if book_key:
            ed = web.ctx.site.get(book_key)
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return simplejson.dumps(metadata)
Exemplo n.º 7
0
    def GET(self):
        i = web.input(isbn='', asin='')
        if not (i.isbn or i.asin):
            return json.dumps({'error': 'isbn or asin required'})
        id_ = i.asin if i.asin else normalize_isbn(i.isbn)
        id_type = 'asin' if i.asin else 'isbn_' + ('13' if len(id_) == 13 else '10')

        metadata = {
            'amazon': get_amazon_metadata(id_, id_type=id_type[:4]) or {},
            'betterworldbooks': get_betterworldbooks_metadata(id_)
            if id_type.startswith('isbn_')
            else {},
        }
        # if user supplied isbn_{n} fails for amazon, we may want to check the alternate isbn

        # if bwb fails and isbn10, try again with isbn13
        if id_type == 'isbn_10' and metadata['betterworldbooks'].get('price') is None:
            isbn_13 = isbn_10_to_isbn_13(id_)
            metadata['betterworldbooks'] = (
                isbn_13 and get_betterworldbooks_metadata(isbn_13) or {}
            )

        # fetch book by isbn if it exists
        # TODO: perform existing OL lookup by ASIN if supplied, if possible
        matches = web.ctx.site.things(
            {
                'type': '/type/edition',
                id_type: id_,
            }
        )

        book_key = matches[0] if matches else None

        # if no OL edition for isbn, attempt to create
        if (not book_key) and metadata.get('amazon'):
            book_key = create_edition_from_amazon_metadata(id_, id_type[:4])

        # include ol edition metadata in response, if available
        if book_key:
            ed = web.ctx.site.get(book_key)
            if ed:
                metadata['key'] = ed.key
                if getattr(ed, 'ocaid'):
                    metadata['ocaid'] = ed.ocaid

        return json.dumps(metadata)
Exemplo n.º 8
0
    def serialize(product):
        """Takes a full Amazon product Advertising API returned AmazonProduct
        with multiple ResponseGroups, and extracts the data we are
        interested in.

        :param AmazonAPI product:
        :return: Amazon metadata for one product
        :rtype: dict

        {
          'price': '$54.06',
          'price_amt': 5406,
          'physical_format': 'hardcover',
          'authors': [{'name': 'Guterson, David'}],
          'publish_date': 'Jan 21, 2020',
          #'dimensions': {
          #  'width': [1.7, 'Inches'],
          #  'length': [8.5, 'Inches'],
          #  'weight': [5.4, 'Pounds'],
          #  'height': [10.875, 'Inches']
          # },
          'publishers': ['Victory Belt Publishing'],
          'source_records': ['amazon:1628603976'],
          'title': 'Boundless: Upgrade Your Brain, Optimize Your Body & Defy Aging',
          'url': 'https://www.amazon.com/dp/1628603976/?tag=internetarchi-20',
          'number_of_pages': 640,
          'cover': 'https://m.media-amazon.com/images/I/51IT9MV3KqL._AC_.jpg',
          'languages': ['English']
          'edition_num': '1'
        }

        """
        if not product:
            return {}  # no match?

        item_info = getattr(product, 'item_info')
        images = getattr(product, 'images')
        edition_info = item_info and getattr(item_info, 'content_info')
        attribution = item_info and getattr(item_info, 'by_line_info')
        price = (getattr(product, 'offers') and product.offers.listings
                 and product.offers.listings[0].price)
        brand = (attribution and getattr(attribution, 'brand')
                 and getattr(attribution.brand, 'display_value'))
        manufacturer = (item_info and getattr(item_info, 'by_line_info')
                        and getattr(item_info.by_line_info, 'manufacturer')
                        and item_info.by_line_info.manufacturer.display_value)
        product_group = (item_info and getattr(
            item_info,
            'classifications',
        ) and getattr(item_info.classifications, 'product_group') and
                         item_info.classifications.product_group.display_value)
        try:
            publish_date = edition_info and isoparser.parse(
                edition_info.publication_date.display_value).strftime(
                    '%b %d, %Y')
        except Exception:
            logger.exception("serialize({})".format(product))
            publish_date = None

        book = {
            'url':
            "https://www.amazon.com/dp/%s/?tag=%s" %
            (product.asin, h.affiliate_id('amazon')),
            'source_records': ['amazon:%s' % product.asin],
            'isbn_10': [product.asin],
            'isbn_13': [isbn_10_to_isbn_13(product.asin)],
            'price':
            price and price.display_amount,
            'price_amt':
            price and price.amount and int(100 * price.amount),
            'title': (item_info and item_info.title
                      and getattr(item_info.title, 'display_value')),
            'cover': (images and images.primary and images.primary.large
                      and images.primary.large.url),
            'authors':
            attribution and [{
                'name': contrib.name
            } for contrib in attribution.contributors],
            'publishers':
            list(set(p for p in (brand, manufacturer) if p)),
            'number_of_pages': (edition_info and edition_info.pages_count
                                and edition_info.pages_count.display_value),
            'edition_num': (edition_info and edition_info.edition
                            and edition_info.edition.display_value),
            'publish_date':
            publish_date,
            'product_group':
            product_group,
            'physical_format': (item_info and item_info.classifications
                                and getattr(item_info.classifications.binding,
                                            'display_value', '').lower()),
        }
        return book
Exemplo n.º 9
0
def _serialize_amazon_product(product):
    """Takes a full Amazon product Advertising API returned AmazonProduct
    with multiple ResponseGroups, and extracts the data we are interested in.

    :param amazon.api.AmazonProduct product:
    :return: Amazon metadata for one product
    :rtype: dict
    """

    price_fmt = price = qlt = None
    used = product._safe_get_element_text(
        'OfferSummary.LowestUsedPrice.Amount')
    new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount')

    # prioritize lower prices and newer, all things being equal
    if used and new:
        price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new')
    # accept whichever is available
    elif used or new:
        price, qlt = (used, 'used') if used else (new, 'new')

    if price:
        price = '{:00,.2f}'.format(int(price) / 100.)
        if qlt:
            price_fmt = "$%s (%s)" % (price, qlt)

    data = {
        'url':
        "https://www.amazon.com/dp/%s/?tag=%s" %
        (product.asin, h.affiliate_id('amazon')),
        'price':
        price_fmt,
        'price_amt':
        price,
        'qlt':
        qlt,
        'title':
        product.title,
        'authors': [{
            'name': name
        } for name in product.authors],
        'source_records': ['amazon:%s' % product.asin],
        'number_of_pages':
        product.pages,
        'languages':
        list(product.languages),
        'cover':
        product.large_image_url,
        'product_group':
        product.product_group,
    }
    if product._safe_get_element('OfferSummary') is not None:
        data['offer_summary'] = {
            'total_new':
            int(product._safe_get_element_text('OfferSummary.TotalNew')),
            'total_used':
            int(product._safe_get_element_text('OfferSummary.TotalUsed')),
            'total_collectible':
            int(product._safe_get_element_text(
                'OfferSummary.TotalCollectible')),
        }
        collectible = product._safe_get_element_text(
            'OfferSummary.LowestCollectiblePrice.Amount')
        if new:
            data['offer_summary']['lowest_new'] = int(new)
        if used:
            data['offer_summary']['lowest_used'] = int(used)
        if collectible:
            data['offer_summary']['lowest_collectible'] = int(collectible)
        amazon_offers = product._safe_get_element_text('Offers.TotalOffers')
        if amazon_offers:
            data['offer_summary']['amazon_offers'] = int(amazon_offers)

    if product.publication_date:
        data['publish_date'] = product._safe_get_element_text(
            'ItemAttributes.PublicationDate')
        if re.match(AMAZON_FULL_DATE_RE, data['publish_date']):
            data['publish_date'] = product.publication_date.strftime(
                '%b %d, %Y')

    if product.binding:
        data['physical_format'] = product.binding.lower()
    if product.edition:
        data['edition'] = product.edition
    if product.publisher:
        data['publishers'] = [product.publisher]
    if product.isbn:
        isbn = product.isbn
        if len(isbn) == 10:
            data['isbn_10'] = [isbn]
            data['isbn_13'] = [isbn_10_to_isbn_13(isbn)]
        elif len(isbn) == 13:
            data['isbn_13'] = [isbn]
            if isbn.startswith('978'):
                data['isbn_10'] = [isbn_13_to_isbn_10(isbn)]
    return data
Exemplo n.º 10
0
def _get_amazon_metadata(id_=None, id_type='isbn'):
    # TODO: extend this to work with
    # isbn=, asin=, title=, authors=, etc
    kwargs = {}
    if id_type == 'isbn':
        id_ = normalize_isbn(id_)
        kwargs = {'SearchIndex': 'Books', 'IdType': 'ISBN'}
    kwargs['ItemId'] = id_
    try:
        if not lending.amazon_api:
            raise Exception
        product = lending.amazon_api.lookup(**kwargs)
        # sometimes more than one product can be returned, choose first
        if isinstance(product, list):
            product = product[0]
    except Exception as e:
        return None

    price_fmt, price, qlt = (None, None, None)
    used = product._safe_get_element_text(
        'OfferSummary.LowestUsedPrice.Amount')
    new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount')

    # prioritize lower prices and newer, all things being equal
    if used and new:
        price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new')
    # accept whichever is available
    elif used or new:
        price, qlt = (used, 'used') if used else (new, 'new')

    if price:
        price = '{:00,.2f}'.format(int(price) / 100.)
        if qlt:
            price_fmt = "$%s (%s)" % (price, qlt)

    data = {
        'url':
        "https://www.amazon.com/dp/%s/?tag=%s" %
        (id_, h.affiliate_id('amazon')),
        'price':
        price_fmt,
        'price_amt':
        price,
        'qlt':
        qlt,
        'title':
        product.title,
        'authors': [{
            'name': name
        } for name in product.authors],
        'source_records': ['amazon:%s' % product.asin],
        'number_of_pages':
        product.pages,
        'languages':
        list(product.languages),  # needs to be normalized
        'cover':
        product.large_image_url,
        'product_group':
        product.product_group,
    }
    if product.publication_date:
        # TODO: Don't populate false month and day for older products
        data['publish_date'] = product.publication_date.strftime('%b %d, %Y')
    if product.binding:
        data['physical_format'] = product.binding.lower()
    if product.edition:
        data['edition'] = product.edition
    if product.publisher:
        data['publishers'] = [product.publisher]
    if product.isbn:
        isbn = product.isbn
        if len(isbn) == 10:
            data['isbn_10'] = [isbn]
            data['isbn_13'] = [isbn_10_to_isbn_13(isbn)]
        elif len(isbn) == 13:
            data['isbn_13'] = [isbn]
            if isbn.startswith('978'):
                data['isbn_10'] = [isbn_13_to_isbn_10(isbn)]
    return data
Exemplo n.º 11
0
def test_isbn_10_to_isbn_13():
    assert isbn_10_to_isbn_13('0-940787-08-3') == '9780940787087'
    assert isbn_10_to_isbn_13('0940787083') == '9780940787087'
    assert isbn_10_to_isbn_13('BAD-ISBN') is None
Exemplo n.º 12
0
    def serialize(product):
        """Takes a full Amazon product Advertising API returned AmazonProduct
        with multiple ResponseGroups, and extracts the data we are
        interested in.

        :param AmazonAPI product:
        :return: Amazon metadata for one product
        :rtype: dict

        {
          'price': '$54.06',
          'price_amt': 5406,
          'physical_format': 'Hardcover',
          'authors': [{'role': 'Author', 'name': 'Guterson, David'}],
          'publish_date': 'Jan 21, 2020',
          #'dimensions': {
          #  'width': [1.7, 'Inches'],
          #  'length': [8.5, 'Inches'],
          #  'weight': [5.4, 'Pounds'],
          #  'height': [10.875, 'Inches']
          # },
          'publishers': ['Victory Belt Publishing'],
          'source_records': ['amazon:1628603976'],
          'title': 'Boundless: Upgrade Your Brain, Optimize Your Body & Defy Aging',
          'url': 'https://www.amazon.com/dp/1628603976/?tag=internetarchi-20',
          'number_of_pages': 640,
          'cover': 'https://m.media-amazon.com/images/I/51IT9MV3KqL._AC_.jpg',
          'languages': ['English']
          'edition_num': '1'
        }

        """
        if not product:
            return {}  # no match?

        item_info = product.item_info
        edition_info = item_info.content_info
        attribution = item_info.by_line_info
        price = product.offers.listings and product.offers.listings[0].price
        dims = item_info.product_info and item_info.product_info.item_dimensions

        try:
            publish_date = isoparser.parse(
                edition_info.publication_date.display_value).strftime(
                    '%b %d, %Y')
        except Exception:
            publish_date = None

        book = {
            'url':
            "https://www.amazon.com/dp/%s/?tag=%s" %
            (product.asin, h.affiliate_id('amazon')),
            'source_records': ['amazon:%s' % product.asin],
            'isbn_10': [product.asin],
            'isbn_13': [isbn_10_to_isbn_13(product.asin)],
            'price':
            price and price.display_amount,
            'price_amt':
            price and price.amount and int(100 * price.amount),
            'title':
            item_info.title and item_info.title.display_value,
            'cover': (product.images and product.images.primary
                      and product.images.primary.large
                      and product.images.primary.large.url),
            'authors': [{
                'name': contrib.name,
                'role': contrib.role
            } for contrib in attribution.contributors],
            'publishers':
            attribution.brand and [attribution.brand.display_value],
            'number_of_pages': (edition_info.pages_count
                                and edition_info.pages_count.display_value),
            'edition_num': (edition_info.edition
                            and edition_info.edition.display_value),
            'publish_date':
            publish_date,
            'languages': (edition_info.languages and list(
                set(lang.display_value
                    for lang in edition_info.languages.display_values))),
            'physical_format':
            (item_info.classifications
             and getattr(item_info.classifications.binding, 'display_value')),
            'dimensions':
            dims and {
                d: [getattr(dims, d).display_value,
                    getattr(dims, d).unit]
                for d in dims.to_dict() if getattr(dims, d)
            }
        }
        return book