def get_book_data_from_site(self, url): """ args: url (String): Google Books book url to be parsed returns: SiteBookData (List): format (String): book_title (String): book_image:~ book_image_url (String): isbn_13 (String): description (String): series (String):~ volume_number (String):~ subtitle (String):~ authors (String): book_id (String): site_slug (String): parse_status (String):~ url (String): content (String): ready_for_sale (boolean):~ extra:~ synopsis: The purpose of this function is to parse a url of the Audiobooks website. The url should be a specific book's url, in order for the following function to work. """ response = requests.get(url) format = None book_title = None book_image = None book_image_url = None isbn_13 = None description = None series = None volume_number = None subtitle = None authors = None book_id = None site_slug = None parse_status = None book_url = None content = None ready_for_sale = None extra = None # book_title book_title = self._get_book_title(response.content) # book_image_url book_image_url = self._get_book_image_url(response.content) # book_image book_image = Par_Scrape.get_book_image_from_image_url(book_image_url) # book_isbn_13 isbn_13 = self._get_book_isbn_13() # description description = self._get_book_description(response.content) # series series = self._get_book_series() # volume_number volume_number = self._get_book_volume() # subtitle subtitle = self._get_book_subtitle() # authors authors = self._get_book_authors(response.content) # book_url book_url = url # site_slug site_slug = self._get_book_site_slug() # book_id book_id = self._get_book_id(response.url) # format format = self._get_book_format() # content content = response.content # ready_for_sale ready_for_sale = self._get_book_sale_status(response.content) # parse_status parse_status = Par_Scrape.parse_status([format, book_title, book_image, book_image_url, description, authors, book_id, site_slug, url, content, ready_for_sale]) SiteBookData = [format, book_title, book_image, book_image_url, isbn_13, description, series, volume_number, subtitle, authors, book_id, site_slug, parse_status, url, content, ready_for_sale, extra] return SiteBookData
def get_book_data_from_site(self, url): """ args: url (String): Kobo book url to be parsed returns: SiteBookData (List): format (String): book_title (String): book_image:~ book_image_url (String): isbn_13 (String): description (String): series (String):~ volume_number (Int):~ subtitle (String):~ authors (String): book_id: site_slug (String): parse_status (String):~ url (String): content (String): ready_for_sale (boolean):~ extra:~ synopsis: The purpose of this function is to parse a url of the Kobo website. The url should be a specific book's url, in order for the following function to work. This function works with both digital books and audio books. """ response = requests.get(url) extra = {} # book_title book_title = self._get_book_title(response.content) # book_image_url book_image_url = self._get_book_image_url(response.content) # book_image book_image = Par_Scrape.get_book_image_from_image_url(book_image_url) # isbn_13 isbn_13 = self._get_book_isbn_13(response.content) # description description = self._get_book_description(response.content) # series series = self._get_book_series(response.content) # volume_number volume_number = self._get_book_volume_number(response.content) # subtitle subtitle = self._get_book_subtitle(response.content) # authors authors = self._get_book_authors(response.content) # url url = self._get_book_url(response.content) # site_slug site_slug = self._get_book_site_slug() # book_id book_id = self._get_book_id(response.content) # format book_format = self._get_book_format(response.content) # content content = response.content # ready_for_sale ready_for_sale = self._get_book_availability(response.content) # parse_status parse_status = Par_Scrape.parse_status([ book_format, book_title, book_image, book_image_url, isbn_13, description, authors, book_id, site_slug, url, content, ready_for_sale ]) SiteBookData = [ book_format, book_title, book_image, book_image_url, isbn_13, description, series, volume_number, subtitle, authors, book_id, site_slug, parse_status, url, content, ready_for_sale, extra ] return SiteBookData
def get_book_data_from_site(self, url): content = self.__fetch__(url) format = None book_title = None book_image = None book_image_url = None isbn_13 = None description = None series = None volume_number = None subtitle = None authors = None book_id = None site_slug = None parse_status = None url = None ready_for_sale = None extra = None #format format = 'DIGITAL' #book_title book_title = self.__get_title__(content) #subtitle subtitle = self.__get_subtitle__(content) #book_image_url book_image_url = self.__get_book_image_url__(content) #book_image book_image = Par_Scrape.get_book_image_from_image_url(book_image_url) #isbn_13 isbn_13 = self.__get_isbn__(content) #description description = self.__get_description__(content) #authors authors = self.__get_authors__(content) #book_id book_id = self.__get_book_id__(content) #site_slug site_slug = 'LC' #url url = self.__get_url__(content) #ready_for_sale ready_for_sale = True #price = None if subtitle == None: parse_list = [format, book_title, book_image, book_image_url, isbn_13, description, authors, book_id, site_slug, url, content, ready_for_sale] else: parse_list = [format, book_title, book_image, book_image_url, isbn_13, description, subtitle, authors, book_id, site_slug, url, content, ready_for_sale] #parse_status parse_status = Par_Scrape.parse_status(parse_list) SiteBookData = [format, book_title, book_image, book_image_url, isbn_13, description, series, volume_number, subtitle, authors, book_id, site_slug, parse_status, url, content, ready_for_sale, extra] return SiteBookData