def lookup_by_isbn(number, forceUpdate=False): isbn, price = _process_isbn(number) print("Looking up isbn", isbn, "with price", price) # if length of isbn>0 and isn't "n/a" or "none" if len(isbn) > 0 and not re.match("^n(\s|/){0,1}a|none", isbn, re.I): # first we check our database titles = Title.select(Title.q.isbn == isbn) ##print titles #debug known_title = False the_titles = list(titles) if (len(the_titles) > 0) and (not forceUpdate): ##print "in titles" known_title = the_titles[0] ProductName = the_titles[0].booktitle.format() authors = [] if len(the_titles[0].author) > 0: authors = [x.authorName.format() for x in the_titles[0].author] authors_as_string = ", ".join(authors) categories = [] if len(the_titles[0].categorys) > 0: ##print len(the_titles[0].categorys) ##print the_titles[0].categorys categories = [x.categoryName.format() for x in the_titles[0].categorys] categories_as_string = ", ".join(categories) if price == 0: if len(the_titles[0].books) > 0: ListPrice = max([x.listprice for x in the_titles[0].books]) else: ListPrice = 0 else: ListPrice = price Manufacturer = the_titles[0].publisher.format() Format = the_titles[0].type.format() Kind = the_titles[0].kind.kindName orig_isbn = the_titles[0].origIsbn.format() # if the_titles[0].images: # large_url = the_titles[0].images.largeUrl # med_url = the_titles[0].images.medUrl # small_url = the_titles[0].images.smallUrl # else: # large_url = med_url = small_url = '' large_url = med_url = small_url = "" SpecialOrders = [ tso.id for tso in Title.selectBy( isbn=isbn ).throughTo.specialorder_pivots.filter( TitleSpecialOrder.q.orderStatus == "ON ORDER" ) ] return { "title": ProductName, "authors": authors, "authors_as_string": authors_as_string, "categories_as_string": categories_as_string, "list_price": ListPrice, "publisher": Manufacturer, "isbn": isbn, "orig_isbn": orig_isbn, "large_url": large_url, "med_url": med_url, "small_url": small_url, "format": Format, "kind": Kind, "known_title": known_title, "special_order_pivots": SpecialOrders, } else: # we don't have it yet # if we're using amazon ecs if use_amazon_ecs: sleep(1) # so amazon doesn't get huffy ecs.setLicenseKey(amazon_license_key) ecs.setSecretAccessKey(amazon_secret_key) ecs.setAssociateTag(amazon_associate_tag) ##print "about to search", isbn, isbn[0] amazonBooks = [] idType = "" if len(isbn) == 12: idType = "UPC" elif len(isbn) == 13: # if we are using an internal isbn if isbn.startswith(internal_isbn_prefix): return [] # otherwise search on amazon. elif isbn.startswith("978") or isbn.startswith("979"): idType = "ISBN" else: idType = "EAN" try: print("searching amazon for ", isbn, idType, file=sys.stderr) amazonProds = AmzSear(isbn) print(amazonProds, file=sys.stderr) except (ecs.InvalidParameterValue, HTTPError): pass if amazonProds: print(amazonProds, file=sys.stderr) # inner comprehension tests each prodict for price whose type is in formats # if we find a price which its key is in formats, then we return the coorresponding product format_list = [ "Paperback", "Mass Market Paperback", "Hardcover", "Perfect Paperback", "Pamphlet", "Plastic Comb", "Spiral-bound", "Print on Demand (Paperback)", "DVD", "Calendar", "Board book", "Audio Cassette", "Cards", "Audio CD", "Diary", "DVD-ROM", "Library Binding", "music", "Vinyl", "Health and Beauty", "Hardback", ] prods = [ x for x in amazonProds.values() if [dum for dum in x["prices"].keys() if dum in format_list] ] for prod1 in prods: print(prod1, file=sys.stderr) price_dict = prod1["prices"] listprice = max(price_dict.values()) format = [k for k in format_list if k in price_dict] format = format[0] if not format: continue title = prod1["title"] image_url = prod1["image_url"] authors = [ x.replace("by ", "") for x in prod1["subtext"] if x.startswith("by ") ] auth_list = [ y.strip() for a in [x.split(", ") for x in authors[0].split(" and ")] for y in a ] # we assume any full name less than five characters is an abbreviation like 'Jr.' # so we add it back to the previous authorname abbrev_list = [i for i, x in enumerate(auth_list) if len(x) < 5] for i in abbrev_list: auth_list[i - 1 : i + 1] = [ ", ".join(auth_list[i - 1 : i + 1]) ] return { "title": title, "authors": auth_list, "authors_as_string": ",".join(auth_list), "categories_as_string": "", "list_price": listprice, "publisher": "", "isbn": isbn, "orig_isbn": isbn, "large_url": image_url, "med_url": image_url, "small_url": image_url, "format": format, "kind": "books", "known_title": known_title, "special_orders": [], } else: traceback.print_exc() print("using isbnlib from ecs", file=sys.stderr) isbnlibbooks = [] try: isbnlibbooks = isbnlib.meta(str(isbn)) except: pass if isbnlibbooks: return { "title": isbnlibbooks["Title"], "authors": isbnlibbooks["Authors"], "authors_as_string": ",".join(isbnlibbooks["Authors"]), "categories_as_string": None, "list_price": price, "publisher": isbnlibbooks["Publisher"], "isbn": isbn, "orig_isbn": isbn, "large_url": None, "med_url": None, "small_url": None, "format": None, "kind": "books", "known_title": known_title, "special_orders": [], } else: return {} else: # if we're scraping amazon print("scraping amazon", file=sys.stderr) headers = { "User-Agent": random.sample(user_agents, 1).pop() } amazon_url_template = "http://www.amazon.com/dp/%s/" if len(isbn) == 13: isbn10 = None if isbnlib.is_isbn13(isbn): isbn10 = isbnlib.to_isbn10(isbn) else: return {} if isbn10: with requests.Session() as session: try: print("getting amazon") page_response = session.get( amazon_url_template % isbn10, headers=headers, timeout=0.1 ) print("got response") page_content = BeautifulSoup(page_response.content, "lxml") print("got parsed content") try: booktitle = page_content.select("#productTitle").pop().text except Exception as e: traceback.print_exc() booktitle = '' popover_preload = [ a.text for a in page_content.select( ".author.notFaded .a-popover-preload a.a-link-normal" ) ] author_name = [ a.text for a in page_content.select( ".author.notFaded a.a-link-normal" ) if a.text not in popover_preload ] contributor_role = page_content.select(".contribution span") try: contributor_role = [ re.findall("\w+", cr.text).pop() for cr in contributor_role ] except Exception as e: traceback.print_exc() contributor_role = [] author_role = zip(author_name, contributor_role) try: listprice = ( page_content.select(".a-text-strike").pop().text ) except IndexError as e: print("using bookfinder4u") if "listprice" not in locals(): with requests.Session() as session: bookfinderurl = "http://www.bookfinder4u.com/IsbnSearch.aspx?isbn='%s'&mode=direct" url = bookfinderurl % isbn try: page_response2 = session.get( url, headers=headers, timeout=0.1 ) page_content2 = BeautifulSoup( page_response2.content, "lxml" ) except Exception as e: traceback.print_exc() listprice = 0.0 else: try: matches = re.search( "List\sprice:\s(\w{2,4})\s(\d+(.\d+)?)", page_content2.text, re.I, ) if matches: listprice = matches.groups()[1] else: listprice = 0.00 except Exception as e: traceback.print_exc() listprice = 0.00 try: book_edition = ( page_content.select("#bookEdition").pop().text ) except Exception as e: traceback.print_exc() book_edition = "" try: matches = re.findall( "(?<=imageGalleryData'\s:\s\[)\{.*?\}", page_content.contents[1].text, ) image_url_dict = eval(matches[0]) except Exception as e: traceback.print_exc() image_url_dict = {"mainUrl": "", "thumbUrl": ""} category_items = page_content.select(".zg_hrsr_ladder a") category_items = [a.text for a in category_items] product_details = page_content.select( "#productDetailsTable" ) # ul:first-of-type") try: product_details1 = product_details.pop().text.splitlines() quit_flag = 0 for pd in product_details1: if pd.endswith("pages"): format, numpages = pd.split(":") numpages = numpages.replace(" pages", "").strip() quit_flag += 1 continue if pd.startswith("Publisher: "): matches = re.match( "Publisher: ([^;^(]*)\s?([^(]*)?\W(.*)\W", pd ).groups() publisher = matches[0] publication_date = matches[2] quit_flag += 1 continue if quit_flag == 2: break else: publisher = '' format = '' except Exception as e: traceback.print_exc() publisher = '' format = '' if booktitle: return { "title": booktitle, "authors": author_name, "authors_as_string": ",".join(author_name), "categories_as_string": ",".join(category_items), "list_price": listprice, "publisher": publisher, "isbn": isbn, "orig_isbn": isbn, "large_url": image_url_dict["mainUrl"], "med_url": image_url_dict["mainUrl"], "small_url": image_url_dict["thumbUrl"], "format": format, "kind": "books", "known_title": known_title, "special_orders": [], } except Exception as e: traceback.print_exc() print("using isbnlib from scraper", file=sys.stderr) isbnlibbooks = [] try: isbnlibbooks = isbnlib.meta(str(isbn)) except: pass if isbnlibbooks: return { "title": isbnlibbooks["Title"], "authors": isbnlibbooks["Authors"], "authors_as_string": ",".join( isbnlibbooks["Authors"] ), "categories_as_string": None, "list_price": price, "publisher": isbnlibbooks["Publisher"], "isbn": isbn, "orig_isbn": isbn, "large_url": None, "med_url": None, "small_url": None, "format": None, "kind": "books", "known_title": known_title, "special_orders": [], } else: return {} else: if title: return { "title": title, "authors": author_name, "authors_as_string": ",".join(author_name), "categories_as_string": ",".join(category_items), "list_price": listprice, "publisher": publisher, "isbn": isbn, "orig_isbn": isbn, "large_url": image_url_dict["mainUrl"], "med_url": image_url_dict["mainUrl"], "small_url": image_url_dict["thumbUrl"], "format": format, "kind": "books", "known_title": known_title, "special_orders": [], } else: return {} else: return {}
finally: readline.set_startup_hook() should_quit = False while should_quit != True: isbn = raw_input('isbn or title >> ') if isbn.lower().strip() == 'quit' or isbn.lower().strip() == 'q': should_quit = True continue if re.match('^[0-9]{13}$|^[0-9]{18}$', isbn): try: isbn, price = isbn[0:13], float(isbn[13, -1]) except: isbn, price = isbn[0:13], 0.00 titles = Title.selectBy(isbn=isbn) book = None if list(titles): for t1 in titles: ourprice = rlinput("price >> ", prefill=price) try: float(ourprice) except: continue books = Book.selectBy(titleID=t1.id, ourprice=float(ourprice), status='STOCK') if list(books): ourprice = books[0].ourprice listprice = books[0].listprice book = books[0]