示例#1
0
    def serialize(product):
        """Takes a full Amazon product Advertising API returned AmazonProduct
        with multiple ResponseGroups, and extracts the data we are
        interested in.

        :param AmazonAPI product:
        :return: Amazon metadata for one product
        :rtype: dict

        {
          'price': '$54.06',
          'price_amt': 5406,
          'physical_format': 'hardcover',
          'authors': [{'name': 'Guterson, David'}],
          'publish_date': 'Jan 21, 2020',
          #'dimensions': {
          #  'width': [1.7, 'Inches'],
          #  'length': [8.5, 'Inches'],
          #  'weight': [5.4, 'Pounds'],
          #  'height': [10.875, 'Inches']
          # },
          'publishers': ['Victory Belt Publishing'],
          'source_records': ['amazon:1628603976'],
          'title': 'Boundless: Upgrade Your Brain, Optimize Your Body & Defy Aging',
          'url': 'https://www.amazon.com/dp/1628603976/?tag=internetarchi-20',
          'number_of_pages': 640,
          'cover': 'https://m.media-amazon.com/images/I/51IT9MV3KqL._AC_.jpg',
          'languages': ['English']
          'edition_num': '1'
        }

        """
        if not product:
            return {}  # no match?

        item_info = getattr(product, 'item_info')
        images = getattr(product, 'images')
        edition_info = item_info and getattr(item_info, 'content_info')
        attribution = item_info and getattr(item_info, 'by_line_info')
        price = (getattr(product, 'offers') and product.offers.listings
                 and product.offers.listings[0].price)
        brand = (attribution and getattr(attribution, 'brand')
                 and getattr(attribution.brand, 'display_value'))
        manufacturer = (item_info and getattr(item_info, 'by_line_info')
                        and getattr(item_info.by_line_info, 'manufacturer')
                        and item_info.by_line_info.manufacturer.display_value)
        product_group = (item_info and getattr(
            item_info,
            'classifications',
        ) and getattr(item_info.classifications, 'product_group') and
                         item_info.classifications.product_group.display_value)
        try:
            publish_date = edition_info and isoparser.parse(
                edition_info.publication_date.display_value).strftime(
                    '%b %d, %Y')
        except Exception:
            logger.exception("serialize({})".format(product))
            publish_date = None

        book = {
            'url':
            "https://www.amazon.com/dp/%s/?tag=%s" %
            (product.asin, h.affiliate_id('amazon')),
            'source_records': ['amazon:%s' % product.asin],
            'isbn_10': [product.asin],
            'isbn_13': [isbn_10_to_isbn_13(product.asin)],
            'price':
            price and price.display_amount,
            'price_amt':
            price and price.amount and int(100 * price.amount),
            'title': (item_info and item_info.title
                      and getattr(item_info.title, 'display_value')),
            'cover': (images and images.primary and images.primary.large
                      and images.primary.large.url),
            'authors':
            attribution and [{
                'name': contrib.name
            } for contrib in attribution.contributors],
            'publishers':
            list(set(p for p in (brand, manufacturer) if p)),
            'number_of_pages': (edition_info and edition_info.pages_count
                                and edition_info.pages_count.display_value),
            'edition_num': (edition_info and edition_info.edition
                            and edition_info.edition.display_value),
            'publish_date':
            publish_date,
            'product_group':
            product_group,
            'physical_format': (item_info and item_info.classifications
                                and getattr(item_info.classifications.binding,
                                            'display_value', '').lower()),
        }
        return book
示例#2
0
from openlibrary.core import cache, helpers as h
from openlibrary.utils import dateutil
from openlibrary.utils.isbn import (normalize_isbn, isbn_13_to_isbn_10,
                                    isbn_10_to_isbn_13)
from openlibrary.catalog.add_book import load
from openlibrary import accounts

logger = logging.getLogger("openlibrary.vendors")

BETTERWORLDBOOKS_BASE_URL = 'https://betterworldbooks.com'
BETTERWORLDBOOKS_API_URL = (
    'https://products.betterworldbooks.com/service.aspx?'
    'IncludeAmazon=True&ItemId=')
affiliate_server_url = None
BWB_AFFILIATE_LINK = 'http://www.anrdoezrs.net/links/{}/type/dlg/http://www.betterworldbooks.com/-id-%s'.format(
    h.affiliate_id('betterworldbooks'))
AMAZON_FULL_DATE_RE = re.compile(r'\d{4}-\d\d-\d\d')
ISBD_UNIT_PUNCT = ' : '  # ISBD cataloging title-unit separator punctuation


def setup(config):
    global affiliate_server_url
    affiliate_server_url = config.get('affiliate_server')


class AmazonAPI:
    """Amazon Product Advertising API 5.0 wrapper for Python"""
    RESOURCES = {
        'all': [
            getattr(GetItemsResource, v) for v in
            # Hack: pulls all resource consts from GetItemsResource
示例#3
0
from openlibrary.utils import dateutil
from openlibrary.utils.isbn import (
    normalize_isbn, isbn_13_to_isbn_10, isbn_10_to_isbn_13)
from openlibrary.catalog.add_book import load
from openlibrary import accounts


logger = logging.getLogger("openlibrary.vendors")

amazon_api = None
config_amz_api = None

BETTERWORLDBOOKS_BASE_URL = 'https://betterworldbooks.com'
BETTERWORLDBOOKS_API_URL = ('https://products.betterworldbooks.com/service.aspx?'
                            'IncludeAmazon=True&ItemId=')
BWB_AFFILIATE_LINK = 'http://www.anrdoezrs.net/links/{}/type/dlg/http://www.betterworldbooks.com/-id-%s'.format(h.affiliate_id('betterworldbooks'))
AMAZON_FULL_DATE_RE = re.compile(r'\d{4}-\d\d-\d\d')
ISBD_UNIT_PUNCT = ' : '  # ISBD cataloging title-unit separator punctuation


def setup(config):
    global config_amz_api, amazon_api
    config_amz_api = config.get('amazon_api')
    try:
        amazon_api = AmazonAPI(
            config_amz_api.key, config_amz_api.secret,
            config_amz_api.id, throttling=0.9)
    except AttributeError:
        amazon_api = None

示例#4
0
def _serialize_amazon_product(product):
    """Takes a full Amazon product Advertising API returned AmazonProduct
    with multiple ResponseGroups, and extracts the data we are interested in.

    :param amazon.api.AmazonProduct product:
    :return: Amazon metadata for one product
    :rtype: dict
    """

    price_fmt = price = qlt = None
    used = product._safe_get_element_text(
        'OfferSummary.LowestUsedPrice.Amount')
    new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount')

    # prioritize lower prices and newer, all things being equal
    if used and new:
        price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new')
    # accept whichever is available
    elif used or new:
        price, qlt = (used, 'used') if used else (new, 'new')

    if price:
        price = '{:00,.2f}'.format(int(price) / 100.)
        if qlt:
            price_fmt = "$%s (%s)" % (price, qlt)

    data = {
        'url':
        "https://www.amazon.com/dp/%s/?tag=%s" %
        (product.asin, h.affiliate_id('amazon')),
        'price':
        price_fmt,
        'price_amt':
        price,
        'qlt':
        qlt,
        'title':
        product.title,
        'authors': [{
            'name': name
        } for name in product.authors],
        'source_records': ['amazon:%s' % product.asin],
        'number_of_pages':
        product.pages,
        'languages':
        list(product.languages),
        'cover':
        product.large_image_url,
        'product_group':
        product.product_group,
    }
    if product._safe_get_element('OfferSummary') is not None:
        data['offer_summary'] = {
            'total_new':
            int(product._safe_get_element_text('OfferSummary.TotalNew')),
            'total_used':
            int(product._safe_get_element_text('OfferSummary.TotalUsed')),
            'total_collectible':
            int(product._safe_get_element_text(
                'OfferSummary.TotalCollectible')),
        }
        collectible = product._safe_get_element_text(
            'OfferSummary.LowestCollectiblePrice.Amount')
        if new:
            data['offer_summary']['lowest_new'] = int(new)
        if used:
            data['offer_summary']['lowest_used'] = int(used)
        if collectible:
            data['offer_summary']['lowest_collectible'] = int(collectible)
        amazon_offers = product._safe_get_element_text('Offers.TotalOffers')
        if amazon_offers:
            data['offer_summary']['amazon_offers'] = int(amazon_offers)

    if product.publication_date:
        data['publish_date'] = product._safe_get_element_text(
            'ItemAttributes.PublicationDate')
        if re.match(AMAZON_FULL_DATE_RE, data['publish_date']):
            data['publish_date'] = product.publication_date.strftime(
                '%b %d, %Y')

    if product.binding:
        data['physical_format'] = product.binding.lower()
    if product.edition:
        data['edition'] = product.edition
    if product.publisher:
        data['publishers'] = [product.publisher]
    if product.isbn:
        isbn = product.isbn
        if len(isbn) == 10:
            data['isbn_10'] = [isbn]
            data['isbn_13'] = [isbn_10_to_isbn_13(isbn)]
        elif len(isbn) == 13:
            data['isbn_13'] = [isbn]
            if isbn.startswith('978'):
                data['isbn_10'] = [isbn_13_to_isbn_10(isbn)]
    return data
示例#5
0
    def serialize(product):
        """Takes a full Amazon product Advertising API returned AmazonProduct
        with multiple ResponseGroups, and extracts the data we are
        interested in.

        :param AmazonAPI product:
        :return: Amazon metadata for one product
        :rtype: dict

        {
          'price': '$54.06',
          'price_amt': 5406,
          'physical_format': 'Hardcover',
          'authors': [{'role': 'Author', 'name': 'Guterson, David'}],
          'publish_date': 'Jan 21, 2020',
          #'dimensions': {
          #  'width': [1.7, 'Inches'],
          #  'length': [8.5, 'Inches'],
          #  'weight': [5.4, 'Pounds'],
          #  'height': [10.875, 'Inches']
          # },
          'publishers': ['Victory Belt Publishing'],
          'source_records': ['amazon:1628603976'],
          'title': 'Boundless: Upgrade Your Brain, Optimize Your Body & Defy Aging',
          'url': 'https://www.amazon.com/dp/1628603976/?tag=internetarchi-20',
          'number_of_pages': 640,
          'cover': 'https://m.media-amazon.com/images/I/51IT9MV3KqL._AC_.jpg',
          'languages': ['English']
          'edition_num': '1'
        }

        """
        if not product:
            return {}  # no match?

        item_info = product.item_info
        edition_info = item_info.content_info
        attribution = item_info.by_line_info
        price = product.offers.listings and product.offers.listings[0].price
        dims = item_info.product_info and item_info.product_info.item_dimensions

        try:
            publish_date = isoparser.parse(
                edition_info.publication_date.display_value).strftime(
                    '%b %d, %Y')
        except Exception:
            publish_date = None

        book = {
            'url':
            "https://www.amazon.com/dp/%s/?tag=%s" %
            (product.asin, h.affiliate_id('amazon')),
            'source_records': ['amazon:%s' % product.asin],
            'isbn_10': [product.asin],
            'isbn_13': [isbn_10_to_isbn_13(product.asin)],
            'price':
            price and price.display_amount,
            'price_amt':
            price and price.amount and int(100 * price.amount),
            'title':
            item_info.title and item_info.title.display_value,
            'cover': (product.images and product.images.primary
                      and product.images.primary.large
                      and product.images.primary.large.url),
            'authors': [{
                'name': contrib.name,
                'role': contrib.role
            } for contrib in attribution.contributors],
            'publishers':
            attribution.brand and [attribution.brand.display_value],
            'number_of_pages': (edition_info.pages_count
                                and edition_info.pages_count.display_value),
            'edition_num': (edition_info.edition
                            and edition_info.edition.display_value),
            'publish_date':
            publish_date,
            'languages': (edition_info.languages and list(
                set(lang.display_value
                    for lang in edition_info.languages.display_values))),
            'physical_format':
            (item_info.classifications
             and getattr(item_info.classifications.binding, 'display_value')),
            'dimensions':
            dims and {
                d: [getattr(dims, d).display_value,
                    getattr(dims, d).unit]
                for d in dims.to_dict() if getattr(dims, d)
            }
        }
        return book