Exemplo n.º 1
0
def main():
    login(user=username, pwd=password)

    for cat in categories:
        # https://www.blinkist.com/en/nc/categories/entrepreneurship-and-small-business-en
        book_titles = get_books_for_category(
            url="https://www.blinkist.com/en/nc/categories/{cat}/books/".
            format(cat=cat))

        for index, title in enumerate(book_titles):
            print("{}/{} - {}".format(index + 1, len(book_titles), title))

            # check if we already have the epub file
            filename = './{cat}/{title}'.format(
                cat=cat, title=title
            )  # title=book.title.translate(ILLEGAL_FILENAME_CHARACTERS))

            if os.path.isfile(filename + '.epub'):
                print('File already exists')
            else:
                book = ez_epub.Book()
                book.sections = []
                book = analytic_info_html(
                    category=cat,
                    book=book,
                    url="https://www.blinkist.com/books/{title}/".format(
                        title=title))
                book = analytic_content_html(
                    book=book,
                    url="https://www.blinkist.com/en/nc/reader/{title}/".
                    format(title=title))
                book.make(filename)
                print('Created epub file: ' + filename)
Exemplo n.º 2
0
def create_epub():
    title = raw_input('Titulo del libro: ')
    author = raw_input('Autor del libro: ')
    book = ez_epub.Book()
    book.title = title
    book.authors = [author]
    book.sections = epubCreator.parseBook(r'temp.txt', 1, 100)
    book.make(r'%s' % book.title) 
Exemplo n.º 3
0
def main():
    login(username=username, password=password)

    for index, book_url in enumerate(book_urls):
        title = extract_title_from_book_url(book_url)
        print("{}/{} - {}".format(index + 1, len(book_urls), title))
        book = ez_epub.Book()
        book.sections = []
        book = analytic_info_html(book=book, url="https://www.blinkist.com/en/books/{title}.html".format(title=title))
        book = analytic_content_html(book=book, url="https://www.blinkist.com/en/reader/{title}/".format(title=title))
        print('Saving epub')
        book.make('./{title}'.format(title=book.title.translate(ILLEAGAL_FILENAME_CHARACTERS)))
Exemplo n.º 4
0
                    continue
                if p.__contains__("<xa"):
                    p = p.replace("<xa", "<a")
                section.text.append(genshi.core.Markup(p))
        else:
            for p in str(page).replace("\n", "<br/>").split("<br/><br/>"):
                p, error = tidy_fragment(p)
                if p.__contains__("<xa"):
                    p = p.replace("<xa", "<a")
                section.text.append(genshi.core.Markup(p))
    except Exception, e:
        print str(e)
        pass
    return section


book = ez_epub.Book()
book.title = "Paul Graham's Essays"
book.authors = ['Paul Graham']

page = urllib2.urlopen('http://www.paulgraham.com/articles.html').read()
soup = BeautifulSoup(page)
soup.prettify()

links = soup.findAll('table', {'width': '435'})[1].findAll('a')
sections = []
for link in links:
    sections.append(addSection(link['href'], link.text))

book.sections = sections
book.make(book.title)
 def generate_epub(self):
     book = ez_epub.Book()
     book.title = self.title
     book.authors = [self.author]
     book.sections = [chapter.generate_epub() for chapter in self.chapters]
     return book
Exemplo n.º 6
0
def download_story(story_url):
    # TODO verify input URL better
    story_id = story_url.split('/')[-1].split('-')[0]

    # TODO: probably use {'drafts': 0, 'include_deleted': 0}
    storyinfo_req = session.get(API_STORYINFO + story_id,
                                params={
                                    'drafts': 1,
                                    'include_deleted': 1
                                })
    storyinfo_json = json.loads(storyinfo_req.content)

    story_title = storyinfo_json['title']
    story_description = storyinfo_json['description']
    story_createDate = dateutil.parser.parse(storyinfo_json['createDate'])
    story_modifyDate = dateutil.parser.parse(storyinfo_json['modifyDate'])
    story_author = storyinfo_json['user']['name']
    story_categories = [
        categories[c] for c in storyinfo_json['categories'] if c in categories
    ]  # category can be 0
    story_rating = storyinfo_json['rating']  # TODO: I think 4 is adult?
    story_cover = StringIO.StringIO(
        session.get(storyinfo_json['cover']).content)

    print 'Story "{story_title}": {story_id}'.format(story_title=story_title,
                                                     story_id=story_id)

    # Setup epub
    book = ez_epub.Book()
    book.title = story_title
    book.authors = [story_author]
    book.sections = []
    book.impl.add_cover(story_cover)
    book.impl.description = HTML(
        story_description,
        encoding='utf-8')  # TODO: not sure if this is HTML or text
    book.impl.add_meta('publisher', 'Wattpad - scraped')
    book.impl.add_meta('source', story_url)

    for part in storyinfo_json['parts']:
        chapter_title = part['title']

        if part['draft']:
            print 'Skipping "{chapter_title}": {chapter_id}, part is draft'.format(
                chapter_title=chapter_title, chapter_id=chapter_id)
            continue

        if 'deleted' in part and part['deleted']:
            print 'Skipping "{chapter_title}": {chapter_id}, part is deleted'.format(
                chapter_title=chapter_title, chapter_id=chapter_id)
            continue

        chapter_id = part['id']

        # TODO: could intelligently only redownload modified parts
        chapter_modifyDate = dateutil.parser.parse(part['modifyDate'])

        print 'Downloading "{chapter_title}": {chapter_id}'.format(
            chapter_title=chapter_title, chapter_id=chapter_id)

        chapter_req = session.get(API_STORYTEXT, params={'id': chapter_id})
        chapter_html = json.loads(chapter_req.content)['text']

        section = ez_epub.Section()
        section.html = HTML(chapter_html, encoding='utf-8')
        section.title = chapter_title
        book.sections.append(section)

    print 'Saving epub'
    book.make(book.title + '.epub')
Exemplo n.º 7
0
def download_story(story_id):
    # TODO: probably use {'drafts': 0, 'include_deleted': 0}
    storyinfo = session.get(API_STORYINFO + story_id,
                            params={
                                'drafts': 1,
                                'include_deleted': 1
                            }).json()

    story_title = storyinfo['title']
    story_description = storyinfo['description']
    story_createDate = dateutil.parser.parse(storyinfo['createDate'])
    story_modifyDate = dateutil.parser.parse(storyinfo['modifyDate'])
    story_author = storyinfo['user']['name']
    story_categories = [
        categories[c] for c in storyinfo['categories'] if c in categories
    ]  # category can be 0
    story_rating = storyinfo['rating']  # TODO: I think 4 is adult?
    story_cover = io.BytesIO(session.get(storyinfo['cover']).content)
    story_url = storyinfo['url']

    print('Story "{story_title}": {story_id}'.format(story_title=story_title,
                                                     story_id=story_id))

    # Setup epub
    book = ez_epub.Book()
    book.title = story_title
    book.authors = [story_author]
    book.sections = []
    book.impl.addCover(fileobj=story_cover)
    book.impl.description = HTML(
        story_description,
        encoding='utf-8')  # TODO: not sure if this is HTML or text
    book.impl.url = story_url
    book.impl.addMeta('publisher', 'Wattpad - scraped')
    book.impl.addMeta('source', story_url)

    for part in storyinfo['parts']:
        chapter_title = part['title']

        if part['draft']:
            print('Skipping "{chapter_title}": {chapter_id}, part is draft'.
                  format(chapter_title=chapter_title, chapter_id=chapter_id))
            continue

        if 'deleted' in part and part['deleted']:
            print('Skipping "{chapter_title}": {chapter_id}, part is deleted'.
                  format(chapter_title=chapter_title, chapter_id=chapter_id))
            continue

        chapter_id = part['id']

        # TODO: could intelligently only redownload modified parts
        chapter_modifyDate = dateutil.parser.parse(part['modifyDate'])

        print('Downloading "{chapter_title}": {chapter_id}'.format(
            chapter_title=chapter_title, chapter_id=chapter_id))

        chapter_html = session.get(API_STORYTEXT,
                                   params={
                                       'id': chapter_id,
                                       'output': 'json'
                                   }).json()['text']
        chapter_html = smartypants.smartypants(chapter_html)

        section = ez_epub.Section()
        section.html = HTML(chapter_html, encoding='utf-8')
        section.title = chapter_title
        book.sections.append(section)

    print('Saving epub')
    book.make('./{title}'.format(
        title=book.title.translate(ILLEAGAL_FILENAME_CHARACTERS)))