def _parse_tooltip_info(self, book_data, book_id, image_url): '''Takes information retried from goodreads tooltips link and parses it''' title = book_data.xpath('//a[contains(@class, "readable")]') title = title[0].text if len(title) > 0 else None authors = book_data.xpath('//a[contains(@class, "authorName")]') authors = [authors[0].text] if len(authors) > 0 else None rating_info = book_data.xpath( '//div[@class="bookRatingAndPublishing"]/span[@class="minirating"]' ) if len(rating_info) > 0: rating_string = rating_info[0].text_content().strip().replace( ',', '').split() rating = float(rating_string[rating_string.index('avg') - 1]) num_of_reviews = int(rating_string[-2]) else: rating = None num_of_reviews = None try: asin_elements = book_data.xpath( '//a[contains(@class, "kindlePreviewButtonIcon")]/@href') book_asin = urllib.parse.parse_qs( urllib.parse.urlsplit(asin_elements[0]).query)["asin"][0] except (KeyError, IndexError): book_asin = None # We should get the ASIN from the tooltips file, but just in case we'll # keep this as a fallback (though this only works in some regions - just USA?) if not book_asin: asin_data_page = open_url(self._connection, '/buttons/glide/' + book_id) book_asin = GOODREADS_ASIN_PAT.search(asin_data_page) if not book_asin: return None book_asin = book_asin.group(1) desc = book_data.xpath( '//div[@class="addBookTipDescription"]//span[not(contains(@id, "freeTextContainer"))]' ) desc_backup = book_data.xpath( '//div[@class="addBookTipDescription"]//span[contains(@id, "freeTextContainer")]' ) if len(desc) > 0: desc = re.sub(r'\s+', ' ', desc[0].text).strip() elif len(desc_backup) > 0: desc = re.sub(r'\s+', ' ', desc_backup[0].text).strip() else: return None return { 'class': 'featuredRecommendation', 'asin': book_asin, 'title': title, 'authors': authors, 'imageUrl': image_url, 'description': desc, 'hasSample': False, 'amazonRating': rating, 'numberOfReviews': num_of_reviews }
def _parse_tooltip_info(self, book_data, book_id, image_url): '''Takes information retried from goodreads tooltips link and parses it''' title = book_data.xpath('//a[contains(@class, "readable")]') title = title[0].text if len(title) > 0 else None authors = book_data.xpath('//a[contains(@class, "authorName")]') authors = [authors[0].text] if len(authors) > 0 else None rating_info = book_data.xpath('//div[@class="bookRatingAndPublishing"]/span[@class="minirating"]') if len(rating_info) > 0: rating_string = rating_info[0].text_content().strip().replace(',', '').split() rating = float(rating_string[rating_string.index('avg')-1]) num_of_reviews = int(rating_string[-2]) else: rating = None num_of_reviews = None try: asin_elements = book_data.xpath('//a[contains(@class, "kindlePreviewButtonIcon")]/@href') book_asin = urlparse.parse_qs(urlparse.urlsplit(asin_elements[0]).query)["asin"][0] except (KeyError, IndexError): book_asin = None # We should get the ASIN from the tooltips file, but just in case we'll # keep this as a fallback (though this only works in some regions - just USA?) if not book_asin: asin_data_page = open_url(self._connection, '/buttons/glide/' + book_id) book_asin = GOODREADS_ASIN_PAT.search(asin_data_page) if not book_asin: return None book_asin = book_asin.group(1) desc = book_data.xpath('//div[@class="addBookTipDescription"]//span[not(contains(@id, "freeTextContainer"))]') desc_backup = book_data.xpath('//div[@class="addBookTipDescription"]//span[contains(@id, "freeTextContainer")]') if len(desc) > 0: desc = re.sub(r'\s+', ' ', desc[0].text).strip() elif len(desc_backup) > 0: desc = re.sub(r'\s+', ' ', desc_backup[0].text).strip() else: return None return {'class': 'featuredRecommendation', 'asin': book_asin, 'title': title, 'authors': authors, 'imageUrl': image_url, 'description': desc, 'hasSample': False, 'amazonRating': rating, 'numberOfReviews': num_of_reviews}
def search_for_asin_on_goodreads(self, url): '''Searches for ASIN of book at given url''' book_id_search = BOOK_ID_PAT.search(url) if not book_id_search: return None book_id = book_id_search.group(1) try: response = open_url(self._connections['goodreads'], '/buttons/glide/' + book_id) except PageDoesNotExist: return None book_asin_search = GOODREADS_ASIN_PAT.search(response) if not book_asin_search: return None return book_asin_search.group(1)