def get_page(domain, asin, locale): ecs.setLocale(locale) try: pages = ecs.ItemLookup(asin, ResponseGroup="Medium") except ecs.InvalidParameterValue, e: raise UserException, str(e)
def fetch(page, asin, locale): field_map = [ (['Title'], 'title'), (['ISBN'], 'isbn'), (['Publisher'], 'publisher'), (['Edition'], 'edition'), (['Binding'], 'how_published'), ] amazon_type = extract(page, ["ProductGroup"]) if amazon_type != "Book": raise UserException( "This item on Amazon does not appear to be a book. It looks like a %s" % amazon_type) else: yield ("type", "BOOK") for (path, our_name) in field_map: val = extract(page, path) if val: yield (our_name, tidy(val)) date = extract(page, ['PublicationDate']) if date: try: (year, month, day) = date.split("-") yield ("year", year) yield ("month", month) yield ("day", day) except ValueError: pass authors = extract(page, ['Author']) if authors: if isinstance(authors, basestring): yield ("author", tidy(authors)) else: for author in authors: yield ("author", tidy(author)) # We put the images in as linkouts. CiteULike uses these # internally to show cover images. for (theirs, ours) in [('SmallImage', 'IMGS'), ('MediumImage', 'IMGM'), ('LargeImage', 'IMGL')]: path = [theirs, "URL"] img_url = extract(page, path) if img_url: yield ("linkout", "\t".join([ours, "", img_url, "", ""])) isbn = extract(page, ['ISBN']) if isbn: yield ("linkout", "\t".join(["ISBN", "", isbn, "", ""])) title = extract(page, ["Title"]) if not title and isbn: yield ("status", "redirect\thttp://www.worldcat.org/isbn/%s" % isbn) sys.exit(0) def get_abstract(n): abstract = extract(n, ['EditorialReviews', 'EditorialReview', 'Content']) if abstract: return tidy(html2text(abstract)) seen_abstract = False if not seen_abstract: abstract = get_abstract(page) if abstract: yield ("abstract", abstract) seen_abstract = True # Linkouts to the ASINS yield ("linkout", "\t".join(["AZ-%s" % locale.upper(), "", asin, "", ""])) # Different amazons may know this product by a different # ASIN. Trawl through all of them and see who has this one. for other_locale in ["us", "uk", "de", "jp", "fr", "ca"]: if other_locale == locale: continue ecs.setLocale(other_locale) try: pages = ecs.ItemLookup(asin, ResponseGroup="Medium") except ecs.InvalidParameterValue: continue page = pages if not seen_abstract: abstract = get_abstract(page) if abstract: yield ("abstract", abstract) seen_abstract = True yield ("linkout", "\t".join(["AZ-%s" % other_locale.upper(), "", asin, "", ""]))
def testSetLocale(self): ecs.setLocale( "fr" ) self.assertEqual( ecs.getLocale(), "fr" )
def testSetLocale(self): ecs.setLocale("fr") self.assertEqual(ecs.getLocale(), "fr")
def fetch(page, asin, locale): field_map = [ (['Title'], 'title'), (['ISBN'] , 'isbn'), (['Publisher'], 'publisher'), (['Edition'], 'edition'), (['Binding'], 'how_published'), ] amazon_type = extract(page, ["ProductGroup"]) if amazon_type!="Book": raise UserException("This item on Amazon does not appear to be a book. It looks like a %s" % amazon_type) else: yield ("type", "BOOK") for (path, our_name) in field_map: val = extract(page, path) if val: yield(our_name, tidy(val)) date = extract(page, ['PublicationDate']) if date: try: (year, month, day) = date.split("-") yield ("year", year) yield ("month", month) yield ("day", day) except ValueError: pass authors = extract(page, ['Author']) if authors: if isinstance(authors, basestring): yield ("author", tidy(authors)) else: for author in authors: yield ("author", tidy(author)) # We put the images in as linkouts. CiteULike uses these # internally to show cover images. for (theirs, ours) in [('SmallImage', 'IMGS'), ('MediumImage', 'IMGM'), ('LargeImage', 'IMGL')]: path = [theirs, "URL"] img_url = extract(page, path) if img_url: yield ("linkout", "\t".join([ours, "", img_url, "", ""])) isbn = extract(page, ['ISBN']) if isbn: yield ("linkout", "\t".join(["ISBN", "", isbn, "", ""])) title = extract(page,["Title"]) if not title and isbn: yield("status","redirect\thttp://www.worldcat.org/isbn/%s" % isbn) sys.exit(0) def get_abstract(n): abstract = extract(n, ['EditorialReviews', 'EditorialReview', 'Content']) if abstract: return tidy(html2text(abstract)) seen_abstract = False if not seen_abstract: abstract = get_abstract(page) if abstract: yield ("abstract", abstract) seen_abstract = True # Linkouts to the ASINS yield ("linkout", "\t".join(["AZ-%s" % locale.upper(), "", asin, "", ""]) ) # Different amazons may know this product by a different # ASIN. Trawl through all of them and see who has this one. for other_locale in ["us", "uk", "de", "jp", "fr", "ca"]: if other_locale==locale: continue ecs.setLocale(other_locale) try: pages = ecs.ItemLookup(asin, ResponseGroup="Medium") except ecs.InvalidParameterValue: continue page = pages if not seen_abstract: abstract = get_abstract(page) if abstract: yield ("abstract", abstract) seen_abstract = True yield ("linkout", "\t".join(["AZ-%s" % other_locale.upper(), "", asin, "", ""]) )