def main(): login(user=username, pwd=password) for cat in categories: # https://www.blinkist.com/en/nc/categories/entrepreneurship-and-small-business-en book_titles = get_books_for_category( url="https://www.blinkist.com/en/nc/categories/{cat}/books/". format(cat=cat)) for index, title in enumerate(book_titles): print("{}/{} - {}".format(index + 1, len(book_titles), title)) # check if we already have the epub file filename = './{cat}/{title}'.format( cat=cat, title=title ) # title=book.title.translate(ILLEGAL_FILENAME_CHARACTERS)) if os.path.isfile(filename + '.epub'): print('File already exists') else: book = ez_epub.Book() book.sections = [] book = analytic_info_html( category=cat, book=book, url="https://www.blinkist.com/books/{title}/".format( title=title)) book = analytic_content_html( book=book, url="https://www.blinkist.com/en/nc/reader/{title}/". format(title=title)) book.make(filename) print('Created epub file: ' + filename)
def create_epub(): title = raw_input('Titulo del libro: ') author = raw_input('Autor del libro: ') book = ez_epub.Book() book.title = title book.authors = [author] book.sections = epubCreator.parseBook(r'temp.txt', 1, 100) book.make(r'%s' % book.title)
def main(): login(username=username, password=password) for index, book_url in enumerate(book_urls): title = extract_title_from_book_url(book_url) print("{}/{} - {}".format(index + 1, len(book_urls), title)) book = ez_epub.Book() book.sections = [] book = analytic_info_html(book=book, url="https://www.blinkist.com/en/books/{title}.html".format(title=title)) book = analytic_content_html(book=book, url="https://www.blinkist.com/en/reader/{title}/".format(title=title)) print('Saving epub') book.make('./{title}'.format(title=book.title.translate(ILLEAGAL_FILENAME_CHARACTERS)))
continue if p.__contains__("<xa"): p = p.replace("<xa", "<a") section.text.append(genshi.core.Markup(p)) else: for p in str(page).replace("\n", "<br/>").split("<br/><br/>"): p, error = tidy_fragment(p) if p.__contains__("<xa"): p = p.replace("<xa", "<a") section.text.append(genshi.core.Markup(p)) except Exception, e: print str(e) pass return section book = ez_epub.Book() book.title = "Paul Graham's Essays" book.authors = ['Paul Graham'] page = urllib2.urlopen('http://www.paulgraham.com/articles.html').read() soup = BeautifulSoup(page) soup.prettify() links = soup.findAll('table', {'width': '435'})[1].findAll('a') sections = [] for link in links: sections.append(addSection(link['href'], link.text)) book.sections = sections book.make(book.title)
def generate_epub(self): book = ez_epub.Book() book.title = self.title book.authors = [self.author] book.sections = [chapter.generate_epub() for chapter in self.chapters] return book
def download_story(story_url): # TODO verify input URL better story_id = story_url.split('/')[-1].split('-')[0] # TODO: probably use {'drafts': 0, 'include_deleted': 0} storyinfo_req = session.get(API_STORYINFO + story_id, params={ 'drafts': 1, 'include_deleted': 1 }) storyinfo_json = json.loads(storyinfo_req.content) story_title = storyinfo_json['title'] story_description = storyinfo_json['description'] story_createDate = dateutil.parser.parse(storyinfo_json['createDate']) story_modifyDate = dateutil.parser.parse(storyinfo_json['modifyDate']) story_author = storyinfo_json['user']['name'] story_categories = [ categories[c] for c in storyinfo_json['categories'] if c in categories ] # category can be 0 story_rating = storyinfo_json['rating'] # TODO: I think 4 is adult? story_cover = StringIO.StringIO( session.get(storyinfo_json['cover']).content) print 'Story "{story_title}": {story_id}'.format(story_title=story_title, story_id=story_id) # Setup epub book = ez_epub.Book() book.title = story_title book.authors = [story_author] book.sections = [] book.impl.add_cover(story_cover) book.impl.description = HTML( story_description, encoding='utf-8') # TODO: not sure if this is HTML or text book.impl.add_meta('publisher', 'Wattpad - scraped') book.impl.add_meta('source', story_url) for part in storyinfo_json['parts']: chapter_title = part['title'] if part['draft']: print 'Skipping "{chapter_title}": {chapter_id}, part is draft'.format( chapter_title=chapter_title, chapter_id=chapter_id) continue if 'deleted' in part and part['deleted']: print 'Skipping "{chapter_title}": {chapter_id}, part is deleted'.format( chapter_title=chapter_title, chapter_id=chapter_id) continue chapter_id = part['id'] # TODO: could intelligently only redownload modified parts chapter_modifyDate = dateutil.parser.parse(part['modifyDate']) print 'Downloading "{chapter_title}": {chapter_id}'.format( chapter_title=chapter_title, chapter_id=chapter_id) chapter_req = session.get(API_STORYTEXT, params={'id': chapter_id}) chapter_html = json.loads(chapter_req.content)['text'] section = ez_epub.Section() section.html = HTML(chapter_html, encoding='utf-8') section.title = chapter_title book.sections.append(section) print 'Saving epub' book.make(book.title + '.epub')
def download_story(story_id): # TODO: probably use {'drafts': 0, 'include_deleted': 0} storyinfo = session.get(API_STORYINFO + story_id, params={ 'drafts': 1, 'include_deleted': 1 }).json() story_title = storyinfo['title'] story_description = storyinfo['description'] story_createDate = dateutil.parser.parse(storyinfo['createDate']) story_modifyDate = dateutil.parser.parse(storyinfo['modifyDate']) story_author = storyinfo['user']['name'] story_categories = [ categories[c] for c in storyinfo['categories'] if c in categories ] # category can be 0 story_rating = storyinfo['rating'] # TODO: I think 4 is adult? story_cover = io.BytesIO(session.get(storyinfo['cover']).content) story_url = storyinfo['url'] print('Story "{story_title}": {story_id}'.format(story_title=story_title, story_id=story_id)) # Setup epub book = ez_epub.Book() book.title = story_title book.authors = [story_author] book.sections = [] book.impl.addCover(fileobj=story_cover) book.impl.description = HTML( story_description, encoding='utf-8') # TODO: not sure if this is HTML or text book.impl.url = story_url book.impl.addMeta('publisher', 'Wattpad - scraped') book.impl.addMeta('source', story_url) for part in storyinfo['parts']: chapter_title = part['title'] if part['draft']: print('Skipping "{chapter_title}": {chapter_id}, part is draft'. format(chapter_title=chapter_title, chapter_id=chapter_id)) continue if 'deleted' in part and part['deleted']: print('Skipping "{chapter_title}": {chapter_id}, part is deleted'. format(chapter_title=chapter_title, chapter_id=chapter_id)) continue chapter_id = part['id'] # TODO: could intelligently only redownload modified parts chapter_modifyDate = dateutil.parser.parse(part['modifyDate']) print('Downloading "{chapter_title}": {chapter_id}'.format( chapter_title=chapter_title, chapter_id=chapter_id)) chapter_html = session.get(API_STORYTEXT, params={ 'id': chapter_id, 'output': 'json' }).json()['text'] chapter_html = smartypants.smartypants(chapter_html) section = ez_epub.Section() section.html = HTML(chapter_html, encoding='utf-8') section.title = chapter_title book.sections.append(section) print('Saving epub') book.make('./{title}'.format( title=book.title.translate(ILLEAGAL_FILENAME_CHARACTERS)))