Exemplo n.º 1
0
def get_book(initial_url):
    base_url = 'http://www.wattpad.com'
    html = get_html(initial_url)

    # Get basic book information
    author = html.select('div.author-info strong a')[0].get_text()
    title = html.select('h1')[0].get_text().strip()
    description = html.select('h2.description')[0].get_text()
    coverurl = html.select('div.cover.cover-lg img')[0]['src']
    labels = ['Wattpad']
    for label in html.select('div.tags a'):
        if '/' in label['href']:
            labels.append(label.get_text())

    print("'{}' by {}".format(title, author))
    # print(next_page_url)

    # Get list of chapters
    chapterlist_url = "{}{}".format(initial_url, "/parts")
    chapterlist = get_html(chapterlist_url).select('ul.table-of-contents a')

    epubfile = "{} - {}.epub".format(title, author)
    if not os.path.exists(epubfile):
        book = epub.EpubBook()
        book.set_title(title)
        book.add_author(author)
        book.set_language('en')
        # book.add_metadata('DC', 'subject', 'Wattpad')
        for label in labels:
            book.add_metadata('DC', 'subject', label)
        # TODO: add a cover without breaking everything
        # urllib.request.urlretrieve(coverurl, "cover.jpg")
        # img = open("cover.jpg", "r", encoding="utf-8")
        # book.set_cover('cover.jpg', img)
        # os.remove("cover.jpg")

        # Define CSS style
        nav_css = epub.EpubItem(uid="style_nav",
                                file_name="Style/nav.css",
                                media_type="text/css",
                                content=open("CSS/nav.css").read())

        body_css = epub.EpubItem(uid="style_body",
                                 file_name="Style/body.css",
                                 media_type="text/css",
                                 content=open("CSS/body.css").read())
        # Add CSS file
        book.add_item(nav_css)
        book.add_item(body_css)

        # Introduction
        intro_ch = epub.EpubHtml(title='Introduction', file_name='intro.xhtml')
        intro_ch.add_item(body_css)
        intro_template = Template(open("HTML/intro.xhtml").read())
        intro_html = intro_template.substitute(title=title,
                                               author=author,
                                               url=initial_url,
                                               synopsis=description)
        intro_ch.content = intro_html
        book.add_item(intro_ch)

        allchapters = []
        for item in chapterlist:
            chaptertitle = item.get_text().strip().replace("/", "-")
            if chaptertitle.upper() != "A-N":
                print("Working on: {}".format(chaptertitle))
                chapter = get_chapter("{}{}".format(base_url, item['href']))
                book.add_item(chapter)
                allchapters.append(chapter)

        # Define Table of Contents
        book.toc = (epub.Link('intro.xhtml', 'Introduction', 'intro'),
                    (epub.Section('Chapters'), allchapters))

        # Add default NCX and Nav file
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # Basic spine
        myspine = [intro_ch, 'nav']
        for i in allchapters:
            myspine.append(i)
        book.spine = myspine

        # Write the epub to file
        epub.write_epub(epubfile, book, {})
    else:
        print("Epub file already exists, not updating")
Exemplo n.º 2
0
def make_epub_html(filename, title, contents):
    html = epub.EpubHtml(title=title, file_name=filename, lang='zh')
    html.content = '<h2>{}</h2>'.format(title)
    html.content += trans_quote(contents)

    return html
Exemplo n.º 3
0
def write_epub(user_slug, doc_slug, file_path):

    # Get all the data
    config = load_env_config()
    data = Data(config)

    user = data.user_get(user_slug)  # or None
    if not user:
        raise RuntimeError("User not found: %s", user_slug)

    document = data.userDocument_get(user_slug, doc_slug)  # or Noen
    if not document:
        raise RuntimeError("Document not found: %s" % doc_slug)

    # -------------------------
    # 0. Create book
    # 1. Create cover
    # 2. Create title page
    # 3. Create chapter (which basically is the book)
    #    ... This upgrades to multiple chapters when compiling books.

    # Pre-processing...

    settings = Settings({
        'config:user': user_slug,
        'config:document': doc_slug,
    })
    wiki = Wiki(settings)
    xhtml = wiki.process(user_slug, doc_slug, document)
    metadata = wiki.compile_metadata(config['TIME_ZONE'], user_slug, doc_slug)
    metadata['url'] = '/read/{:s}/{:s}'.format(user_slug, doc_slug),

    title = metadata.get('title', 'Untitled')
    summary = metadata.get('summary', '')
    author = metadata.get('author', 'Anonymous')
    date = metadata.get('date', '')

    # -------------------------
    # 0. CREATE BOOK

    book = epub.EpubBook()

    # set metadata
    book.set_identifier(user_slug + '+' + doc_slug)
    book.set_title(title)
    book.set_language('en')
    book.add_author(author)

    # define CSS style
    with open('static/epub.css') as f:
        style = f.read()
    global_css = epub.EpubItem(uid="style_nav",
                               file_name="style/nav.css",
                               media_type="text/css",
                               content=style)
    book.add_item(global_css)

    # -------------------------
    # 1. Create Cover

    tmp_cover_file = "/tmp/%s-%s-cover.png" % (user_slug, doc_slug)
    image = make_background((1600, 2200), (160, 184, 160))
    cover = make_cover(image, [title, summary, author, date],
                       [COLOR_TEXT, COLOR_SHADOW])
    cover.save(tmp_cover_file, "JPEG")
    chapter_file_name = doc_slug + '.xhtml'

    assert os.path.exists(tmp_cover_file)
    cover_image = open(tmp_cover_file, 'rb').read()
    book.set_cover("image.jpg", cover_image)

    # -------------------------
    # 2. Create Title Page

    date_string = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    title_xhtml = """
    <html>
    <body>
        <div>Generated by <i>Article Wiki</i>:</div>
        <div>%s</div>
        <div>&nbsp;</div>
        <div>Permanent URL:</div>
        <div>http://chapman.wiki/read/%s/%s</div>
    </body>
    </html>
    """ % (date_string, user_slug, doc_slug)

    c1 = epub.EpubHtml(title="About this book",
                       file_name="title.xhtml",
                       lang='en')
    c1.content = title_xhtml
    c1.add_item(global_css)
    book.add_item(c1)

    # -------------------------
    # 3. Create Chapter

    c2 = epub.EpubHtml(title=title, file_name=chapter_file_name, lang='en')
    c2.content = xhtml
    c2.add_item(global_css)
    book.add_item(c2)

    # Define Table Of Contents
    book.toc = (
        epub.Link(chapter_file_name, title, doc_slug),
        # (epub.Section(user_slug), (c2))
    )

    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # basic spine
    book.spine = ['nav', c1, c2]

    # write to the file
    epub.write_epub(file_path, book, {})
Exemplo n.º 4
0
def parse():
    global source, out, book
    if not os.path.exists(source):
        logger.error('%s 文件不存在!' % source)
        return

    logger.info('开始解析')
    filename = os.path.split(source)[-1]
    if out == '':
        out = os.path.splitext(filename)[0] + '.epub'

    # 目录管理
    toc = [
        (epub.Section(filename), []),
    ]
    # toc = []
    # 主线
    spine = ['nav']
    set_cover = False

    # print(filename, out)

    # 输入文件夹的情况
    if os.path.isdir(filename):
        logger.info('输入了文件夹 %s' % filename)
        filelist = os.listdir(filename)
        for file in filelist:
            if os.path.splitext(file.lower())[-1] != '.zip':
                filelist.remove(file)

        # 先按文件长短,后按文件名排序
        filelist.sort(key=lambda k: (len(k), k))

        for myzipfile in filelist:
            logger.info('输入了文件 %s' % myzipfile)
            myzipfilename = myzipfile.split('.')[0]
            toc.append((epub.Section(myzipfilename), []))
            zipped = zipfile.ZipFile(os.path.join(filename, myzipfile), 'r')
            zipfilelist = list(zipped.filelist)
            # 先按文件长短,后按文件名排序
            zipfilelist.sort(key=lambda k: (len(k.filename), k.filename))
            for file in zipfilelist:
                data = zipped.read(file)
                logger.info("添加文件%s, 文件大小%sKB" %
                            (file.filename, len(data) // 1000))
                img = epub.EpubItem(file_name="images/%s/%s" %
                                    (myzipfilename, file.filename),
                                    media_type="image/%s" %
                                    os.path.splitext(file.filename)[-1][1:],
                                    content=data)
                if set_cover is False:
                    set_cover = True
                    book.set_cover('cover.jpg', data)

                page = epub.EpubHtml(title=file.filename,
                                     file_name='%s_%s.html' %
                                     (myzipfile, file.filename))
                page.set_content(("<img src=\"%s\">" %
                                  ("images/%s/%s" %
                                   (myzipfilename, file.filename))).encode())
                toc[-1][1].append(page)
                toc[-1][1].append(img)
                # toc.append((epub.Section(file.filename.split('.')[0]), [page, img]))
                spine.append(page)
                # spine.append(img)
                book.add_item(page)
                book.add_item(img)

    # 输入zip文件的情况
    if not os.path.isdir(filename):
        logger.info('输入了文件 %s' % filename)
        if os.path.splitext(filename.lower())[-1] != '.zip':
            logger.error('不是zip文件')
            sys.exit()
        zipped = zipfile.ZipFile(filename, 'r')
        filelist = list(zipped.filelist)
        # 先按文件长短,后按文件名排序
        filelist.sort(key=lambda k: (len(k.filename), k.filename))
        for file in filelist:
            data = zipped.read(file)
            logger.info("添加文件%s, 文件大小%sKB" %
                        (file.filename, len(data) // 1000))
            img = epub.EpubItem(file_name="images/%s" % file.filename,
                                media_type="image/%s" %
                                os.path.splitext(file.filename)[-1][1:],
                                content=data)
            if set_cover is False:
                set_cover = True
                book.set_cover('cover.jpg', data)

            page = epub.EpubHtml(title=file.filename,
                                 file_name='%s.html' % file.filename)
            page.set_content(
                ("<img src=\"%s\">" % ("images/%s" % file.filename)).encode())
            toc[-1][1].append(page)
            toc[-1][1].append(img)
            # toc.append((epub.Section(file.filename.split('.')[0]), [page, img]))
            spine.append(page)
            # spine.append(img)
            book.add_item(page)
            book.add_item(img)

    book.toc = toc

    # add navigation files
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # create spine
    book.spine = spine
    epub.write_epub(out, book)

    sys.exit()
Exemplo n.º 5
0
def createEbook(grimoireData):
    book = epub.EpubBook()

    book.set_identifier('destinyGrimoire')
    book.set_title('Destiny Grimoire')
    book.set_language('en')
    book.add_author('Bungie')
    book.set_cover("cover.jpg", open('cover.jpg', 'rb').read())

    style = '''    
	cardname {
		display: block;
    	text-align: center;
    	font-size:150%;
    }
  	cardimage {
  		float: left;
  		margin-right: 5%;
  		width: 40%;
  		height: 40%;
  	}
  	cardintro {
  		display: block;
  		padding: 5%;
  	}
  	carddescription {}
  	container {
  		width: 100%;
  		clear: both;
  	}
  	'''

    default_css = epub.EpubItem(uid="style_default",
                                file_name="style/default.css",
                                media_type="text/css",
                                content=style)
    book.add_item(default_css)

    book.spine = ['nav']

    counter = 1
    tocSections = ()
    for theme in grimoireData["themes"]:
        themePages = ()
        for page in theme["pages"]:
            pageCards = ()
            for card in page["cards"]:
                if counter > 0:
                    bookPage = epub.EpubHtml(
                        title=chapterTitle(card["cardName"]),
                        file_name=chapterPageFile(card["cardName"], counter),
                        lang='en',
                        content="")
                    bookPage.add_item(default_css)
                    imageBaseFileName = '%s_img' % (chapterBaseFileName(
                        card["cardName"], counter))
                    imagePath = createCardImage(
                        imageBaseFileName,
                        os.path.join(
                            'images/%s' %
                            (os.path.basename(card["image"]["sourceImage"]))),
                        card["image"]["regionXStart"],
                        card["image"]["regionYStart"],
                        card["image"]["regionWidth"],
                        card["image"]["regionHeight"])
                    book.add_item(
                        epub.EpubItem(uid=imageBaseFileName,
                                      file_name=imagePath,
                                      content=open(imagePath, 'rb').read()))
                    bookPage.content = u'''	<cardname">%s</cardname>
											<cardintro>%s</cardintro>
											<container>
												<cardimage><img src="%s"/></cardimage>
												<carddescription">%s</carddescription>
											</container>''' % (card["cardName"], safeValue(card["cardIntro"]),
                              imagePath, safeValue(card["cardDescription"]))
                    book.add_item(bookPage)
                    pageCards = pageCards + (bookPage, )
                    book.spine.append(bookPage)
                counter += 1

            themePages = themePages + (
                (epub.Section(page["pageName"]), pageCards), )

        tocSections = tocSections + (
            (epub.Section(theme["themeName"]), themePages), )

    book.toc = tocSections

    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    epub.write_epub('destinyGrimoire.epub', book)
Exemplo n.º 6
0
async def createEpub(link, channel):
    chapters = {}
    chapter = reqJson(link + "1")
    print(link + "1")
    book = epub.EpubBook()
    # set metadata
    book.set_identifier(chapter['info']['urlId'])
    book.set_title(chapter['info']['title'])
    book.set_language('en')
    threads = []
    book.add_author(chapter['info']['author'])
    for i in range(1, int(chapter['info']['chapters'])+1):
        t = threading.Thread(target=worker, args=(book, i, chapters, link))
        threads.append(t)
        t.start()
    for thread in threads:
        thread.join()
    chapters = collections.OrderedDict(sorted(chapters.items()))
    for _, c in sorted(chapters.items()):
        print(c.title)
        book.add_item(c)
    print("requesting intro_page")
    intro_page = reqJson(link)
    intro = epub.EpubHtml(title='Introduction', file_name='introduction' + '.xhtml', lang='hr')
    intro.content = """
    <html>
    <head>
        <title>Introduction</title>
        <link rel="stylesheet" href="style/main.css" type="text/css" />
    </head>
    <body>
        <h1>%s</h1>
        <p><b>By: %s</b></p>
        <p>%s</p>
    </body>
    </html>
    """ % (intro_page['title'], intro_page['author'], intro_page['desc'])
    book.add_item(intro)
    # define Table Of Contents
    book.toc = (epub.Link('introduction.xhtml', 'Introduction', 'intro'),
                (epub.Section('rest of the beautiful owl'),
                list(chapters.values()))
                )

    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # define CSS style
    style = 'BODY {color: white;}'
    nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)

    # add CSS file
    book.add_item(nav_css)

    # basic spine
    doc_style = epub.EpubItem(
        uid="doc_style",
        file_name="style/main.css",
        media_type="text/css",
        content=open("style.css").read()
    )
    nav_page = epub.EpubNav(uid='book_toc', file_name='toc.xhtml')
    nav_page.add_item(doc_style)
    book.add_item(nav_page)
    book.spine = [intro, nav_page] + list(chapters.values())

    print("creating book with name: " + intro_page['title'].replace('/', '_') + '.epub')
    if  not os.path.isdir('Books'):
        os.mkdir('Books')  
    epub.write_epub("Books/" + intro_page['title'].replace('/', '_') + '.epub', book, {})
Exemplo n.º 7
0
        book.add_author(author)

        header_name = os.path.join(
            savePath,
            list(
                filter(
                    re.compile(r'.+header\..+').match,
                    os.listdir(savePath)))[0])
        book.set_cover("cover.jpg", open(header_name, 'rb').read())

        book.toc = []
        book.spine = ['cover']

        # create about page
        chapter = epub.EpubHtml(title='about', file_name='about.xhtml')
        chapter.content = f'<h1>About</h1><p>Title: {name}</p><p>Author: {book.metadata["http://purl.org/dc/elements/1.1/"]["creator"][0][0]}</p><p>Source: <a href="{"https://tapas.io/series/" + urlName}">{"https://tapas.io/series/" + urlName}</a></p>'

        book.add_item(chapter)
        book.spine.append(chapter)

        # Append nav page
        book.spine.append('nav')

        # create chapters
        for pageCount, pageData in enumerate(data):
            printLine(
                'Downloaded page {}/{}...'.format(pageCount + 1, len(data)),
                True)

            pagePq = pq(
Exemplo n.º 8
0
from ebooklib import epub

book = epub.EpubBook()

book.set_identifier('test123')
book.set_title('Test book')
book.set_language('zh-TW')

c1 = epub.EpubHtml(title='Chapter01', file_name='chap_01.xhtml', lang='zh-TW')
c1.connect = u'<h1>Chapter01</h1><p>This is chapter 01 for test</p>'

book.add_item(c1)

book.toc = (epub.Link('chap_01.xhtml', 'Chapter01', 'ch01'), (epub.Section('Simple Book'), (c1, )))

book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())

style = 'BODY {color: white;}'
nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)

book.add_item(nav_css)

book.spine = ['nav', c1]

epub.write_epub('test.epub', book, {})
def get_book(initial_url):
    base_url = 'http://www.wattpad.com'
    html = get_html(initial_url)

    # Get basic book information
    author = html.select(
        'div.author-info:nth-child(1) > div:nth-child(2) > a')[0].get_text()
    title = html.select('.story-info__title')[0].get_text().strip()
    description = html.select('.description-text')[0].get_text()
    coverurl = html.select('.story-cover > img')[0]['src']
    labels = ['Wattpad']
    for label in html.select('div.tags a'):
        if '/' in label['href']:
            labels.append(label.get_text())
    if debug:
        print("Author: " + author)
        print("Title: " + title)
        print("Description: " + description)
        print("Cover: " + coverurl)
        print("Labels:" + " ".join(labels))

    print("'{}' by {}".format(title, author).encode("utf-8"))
    # print(next_page_url)

    # Get list of chapters
    chapterlist = html.select('.story-parts')[0].select('ul:nth-child(1) li a')

    # Remove from the file name those characters that Microsoft does NOT allow.
    # This also affects the FAT filesystem used on most phone/tablet sdcards
    # and other devices used to read epub files.
    # Disallowed characters: \/:*?"<>|^
    filename = title
    for i in ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '^']:
        if i in filename:
            filename = filename.replace(i, '')
    # Apple products disallow files starting with dot
    filename = filename.lstrip('.')

    epubfile = "./books/{} - {}.epub".format(filename, author)
    if not os.path.exists(epubfile):
        book = epub.EpubBook()
        book.set_identifier("wattpad.com//%s/%s" %
                            (initial_url.split('/')[-1], len(chapterlist)))
        book.set_title(title)
        book.add_author(author)
        book.set_language('en')
        # book.add_metadata('DC', 'subject', 'Wattpad')
        for label in labels:
            book.add_metadata('DC', 'subject', label)
        # Add a cover if it's available
        if get_cover(coverurl):
            cover = True
            book.set_cover(file_name='cover.jpg',
                           content=open('cover.jpg', 'rb').read(),
                           create_page=True)
            os.remove('cover.jpg')

        # Define CSS style
        css_path = os.path.join("./utils/css", "nav.css")
        nav_css = epub.EpubItem(uid="style_nav",
                                file_name="Style/nav.css",
                                media_type="text/css",
                                content=open(css_path).read())

        css_path = os.path.join("./utils/css", "body.css")
        body_css = epub.EpubItem(uid="style_body",
                                 file_name="Style/body.css",
                                 media_type="text/css",
                                 content=open(css_path).read())
        # Add CSS file
        book.add_item(nav_css)
        book.add_item(body_css)

        # Introduction
        intro_ch = epub.EpubHtml(title='Introduction', file_name='intro.xhtml')
        intro_ch.add_item(body_css)
        template_path = os.path.join("./utils/html", "intro.xhtml")
        intro_template = Template(open(template_path).read())
        intro_html = intro_template.substitute(title=title,
                                               author=author,
                                               url=initial_url,
                                               synopsis=description)
        intro_ch.content = intro_html
        book.add_item(intro_ch)

        allchapters = []
        for i, item in enumerate(chapterlist, start=1):
            chaptertitle = item.get_text().strip().replace("/", "-")
            if chaptertitle.upper() != "A-N":
                print("Working on: {}".format(chaptertitle).encode("utf-8"))
                chapter = get_chapter("{}{}".format(base_url, item['href']), i)
                book.add_item(chapter)
                allchapters.append(chapter)
                i = i + 1

        # Define Table of Contents
        book.toc = (epub.Link('intro.xhtml', 'Introduction', 'intro'),
                    (epub.Section('Chapters'), allchapters))

        # Add default NCX and Nav file
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # Basic spine
        myspine = []
        if cover:
            myspine.append('cover')
        myspine.extend([intro_ch, 'nav'])
        myspine.extend(allchapters)
        book.spine = myspine

        # Write the epub to file
        epub.write_epub(epubfile, book, {})
        return epubfile
    else:
        print("Epub file already exists, not updating")
        return epubfile
Exemplo n.º 10
0
    ctr = 0
    for k, v in chapter_d.items():
        ctr += 1
        ch_name = v['chapter']
        book = v['book']
        # name and roman numeral?
        name = ch_name.split(' ')[0].lower().encode()
        rest = ch_name.split(' ')[-1]
        if name in names_b and rest in roman_numerals:
            chapter_ct = rom_d[rest]
        else:
            name = ch_name.lower().encode()
            chapter_ct = 0
        new_ch_name = ch_name + ' ' + v['book']
        this_link = new_ch_name.replace(' ', '_') + '.html'
        this_chapter = epub.EpubHtml(title=new_ch_name,
                                     file_name=this_link,
                                     lang='en',
                                     uid=str(ctr))
        this_chapter.set_content(book_d[book][name][chapter_ct]['content'])
        toc.append(epub.Link(this_link, ch_name))
        spine.append(this_chapter)
        AFWD.add_item(this_chapter)

AFWD.toc = toc
AFWD.spine = spine
AFWD.add_item(AFFC_css)
AFWD.add_item(epub.EpubNav())

epub.write_epub('A Feast with Dragons.epub', AFWD)
Exemplo n.º 11
0
        SM = SheetMusic(infile)
        random_filename = str(uuid.uuid4())
        sheet = {'tabs':SM.tabs,\
        'title':SM.title,\
        'file':random_filename}
        sheets.append(sheet)
sheets.sort(key=lambda x: x['title'])  # sort by title ascending

# see https://github.com/aerkalov/ebooklib for details
book = epub.EpubBook()

book.set_identifier(str(uuid.uuid4()))
book.set_title('My Favourite Sheets')
book.set_language('en')
book.add_author('Your Name Here')

book.spine = ['nav']

for s in sheets:
    c = epub.EpubHtml(title=s['title'], file_name=s['file'], lang='en')
    c.content = s['tabs']
    book.add_item(c)
    book.spine = book.spine + [c]

# add default NCX and Nav file
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())

# write to the file
epub.write_epub('my_favourite_sheets.epub', book, {})
Exemplo n.º 12
0
    def _create_toc(self):
        """
        Create table of contents

        :Args:
          - self (:class:`ExportBook`): current class instance
        """

        self.toc = OrderedDict()
        self.spine = ['nav']

        self.hold_chapters_urls = [
            i.url_title for i in self.book_version.get_hold_chapters()
        ]

        for chapter in self.book_version.get_toc():
            if chapter.chapter:
                c1 = epub.EpubHtml(title=chapter.chapter.title,
                                   file_name='%s.xhtml' %
                                   (chapter.chapter.url_title, ))

                # hook for some extra customizations
                cont = self._chapter_content_hook(chapter.chapter.content)

                try:
                    tree = parse_html_string(cont.encode('utf-8'))
                except Exception as err:
                    logger.error('Error parsing chapter content %s' % err)
                    continue

                # hook for some extra customizations
                self._chapter_tree_hook(tree)

                for elem in tree.iter():
                    self._handle_chapter_element(elem)

                c1.content = etree.tostring(tree,
                                            pretty_print=True,
                                            encoding='utf-8',
                                            xml_declaration=True)

                # hook for some extra customizations
                self._epub_chapter_hook(c1)

                self.epub_book.add_item(c1)
                self.spine.append(c1)

                if chapter.parent:
                    self.toc[chapter.parent.id][1].append(c1)
                else:
                    if chapter.has_children():
                        self.toc[chapter.id] = [c1, []]
                    else:
                        self.toc[chapter.id] = c1
            else:
                epub_sec = epub.Section(chapter.name)

                if chapter.parent:
                    self.toc[chapter.parent.id][1].append(epub_sec)
                else:
                    self.toc[chapter.id] = [epub_sec, []]
Exemplo n.º 13
0
def get_book(initial_url):
    base_url = 'http://www.wattpad.com'
    html = get_html(initial_url)

    # Get basic book information
    author = html.select('div.author-info strong a')[0].get_text()
    title = html.select('h1')[0].get_text().strip()
    description = html.select('h2.description')[0].get_text()
    coverurl = html.select('div.cover.cover-lg img')[0]['src']
    labels = ['Wattpad']
    for label in html.select('div.tags a'):
        if '/' in label['href']:
            labels.append(label.get_text())
    if debug:
        print("Author: " + author)
        print("Title: " + title)
        print("Description: " + description)
        print("Cover: " + coverurl)
        print("Labels:" + " ".join(labels))

    print("'{}' by {}".format(title, author))
    # print(next_page_url)

    # Get list of chapters
    chapterlist_url = "{}{}".format(initial_url, "/parts")
    chapterlist = get_html(chapterlist_url).select('ul.table-of-contents a')

    epubfile = "{} - {}.epub".format(title, author)
    if not os.path.exists(epubfile):
        book = epub.EpubBook()
        book.set_identifier("wattpad.com//%s/%s" % (initial_url.split('/')[-1],
                                                    len(chapterlist)))
        book.set_title(title)
        book.add_author(author)
        book.set_language('en')
        # book.add_metadata('DC', 'subject', 'Wattpad')
        for label in labels:
            book.add_metadata('DC', 'subject', label)
        # Add a cover if it's available
        if get_cover(coverurl):
            cover = True
            book.set_cover(file_name='cover.jpg', content=open('cover.jpg',
                                                               'rb').read(),
                           create_page=True)
            os.remove('cover.jpg')

        # Define CSS style
        nav_css = epub.EpubItem(uid="style_nav", file_name="Style/nav.css",
                                media_type="text/css",
                                content=open("CSS/nav.css").read())

        body_css = epub.EpubItem(uid="style_body", file_name="Style/body.css",
                                 media_type="text/css",
                                 content=open("CSS/body.css").read())
        # Add CSS file
        book.add_item(nav_css)
        book.add_item(body_css)

        # Introduction
        intro_ch = epub.EpubHtml(title='Introduction', file_name='intro.xhtml')
        intro_ch.add_item(body_css)
        intro_template = Template(open("HTML/intro.xhtml").read())
        intro_html = intro_template.substitute(title=title, author=author,
                                               url=initial_url,
                                               synopsis=description)
        intro_ch.content = intro_html
        book.add_item(intro_ch)

        allchapters = []
        for item in chapterlist:
            chaptertitle = item.get_text().strip().replace("/", "-")
            if chaptertitle.upper() != "A-N":
                print("Working on: {}".format(chaptertitle))
                chapter = get_chapter("{}{}".format(base_url, item['href']))
                book.add_item(chapter)
                allchapters.append(chapter)

        # Define Table of Contents
        book.toc = (epub.Link('intro.xhtml', 'Introduction', 'intro'),
                    (epub.Section('Chapters'), allchapters))

        # Add default NCX and Nav file
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # Basic spine
        myspine = []
        if cover:
            myspine.append('cover')
        myspine.extend([intro_ch, 'nav'])
        myspine.extend(allchapters)
        book.spine = myspine

        # Write the epub to file
        epub.write_epub(epubfile, book, {})
    else:
        print("Epub file already exists, not updating")
Exemplo n.º 14
0
def wxw():
    ap = argparse.ArgumentParser()
    ap.add_argument('-u', '--url', default='desolate-era-index')
    ap.add_argument('-b', '--books', nargs='+', default=None)
    args = ap.parse_args()

    index_link = BASE_LINK + args.url
    index_req = Request(index_link, headers={'User-Agent': 'Mozilla/5.0'})
    index_soup = BeautifulSoup(urlopen(index_req).read(), 'html5lib')

    series_title = re.search(r'([^:–()])*\w', index_soup.find('h1', attrs={'class': 'entry-title'}).get_text()).group()

    raw_chapter_links = a['href'] for a in index_soup.select('div[itemprop=articleBody] a[href]')
    books = {}
    chapters = {}

    book_titles = index_soup.find('div', attrs={'itemprop': 'articleBody'}).find_all('strong')
    for book in book_titles:
        book_number = re.search(r'^\w*\s\d+', book.get_text())
        if book_number is None:
            continue
        book_number = re.search(r'\d+', book_number.group()).group()
        if args.books is not None and book_number not in args.books:
            continue
        books[book_number] = epub.EpubBook()
        books[book_number].set_title('{} – {}'.format(series_title, book.get_text()))
        books[book_number].set_identifier(uuid.uuid4().hex)
        books[book_number].set_language('en')
        chapters[book_number] = []

    for raw_chapter_link in raw_chapter_links:
        info = re.search(r'\w*-\d+', raw_chapter_link)
        if info is None:
            continue
        book_number = re.search(r'\d+', info.group()).group()
        if book_number not in books:
            continue

        chapter_req = Request(raw_chapter_link, headers={'User-Agent': 'Mozilla/5.0'})
        chapter_soup = BeautifulSoup(urlopen(chapter_req).read(), 'html5lib')
        raw_chapter = chapter_soup.find('div', attrs={'itemprop': 'articleBody'})

        parsed_chapter = []

        hr = 0
        for line in raw_chapter:
            if line.name == 'hr':
                hr += 1
            elif hr == 1 and line.name == 'p':
                parsed_chapter.append(line.get_text())

        chapter_title = re.search(r'\w([^–:])*$', parsed_chapter[0]).group()
        chapter = epub.EpubHtml(
            title=chapter_title,
            file_name='{}.xhtml'.format(uuid.uuid4().hex),
            lang='en'
        )
        # Chapter Title
        parsed_chapter[0] = '<h1>{}</h1>'.format(chapter_title)
        chapter.content = '<br /><br />'.join(str(line) for line in parsed_chapter)

        books[book_number].add_item(chapter)
        books[book_number].toc += (epub.Link(chapter.file_name, chapter.title, uuid.uuid4().hex), )
        chapters[book_number].append(chapter)
        time.sleep(1)
        print('Finished parsing', raw_chapter_link)

    for book_number, book in books.items():
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())
        book.spine = ['Nav'] + chapters[book_number]

        # Not sure exactly what this is doing
        style = 'BODY {color: white;}'
        nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
        book.add_item(nav_css)

        epub.write_epub('{}.epub'.format(''.join(c for c in book.title if c.isalnum())), book, {})

if __name__ == '__main__':
    wxw()
 </head>
 
 <body>"""

    b = """</div></body>
 </html>"""

    #making nake for xhtml files
    title = BeautifulSoup(head, 'html.parser')
    title = str(title.get_text())
    title = re.sub(r'[^A-Za-z0-9 ]+', '', title)
    content = a + head + para + b

    #writing in xhtml
    content = content.encode(encoding='UTF-8', errors='ignore')
    c = epub.EpubHtml(title=title, file_name=title + '.xhtml', lang='en')
    c.set_content(content)
    book.add_item(c)

    #for TOC
    table.append(c)

    #adding to spine important
    book.spine.append(c)

# adding TOC
book.toc = (epub.Link('nav.xhtml', titleOfBook,
                      'nav'), (epub.Section('book'), (table)))

#Make Epub
book.add_item(epub.EpubNcx())
book.add_author('Impey, Chris')
book.spine = ['nav']  # Chapters will be appended later
book.set_cover('cover.jpg', open('cover.jpg', 'rb').read())

# Collect all chapters and sections in this list
book_toc = []

# Loop through all chapters
for chapter_index, chapter in contents.iterrows():
    chapter_title = '{}. {}'.format(chapter_index + 1, chapter['humanchapter'])
    chapter_file_name = 'chap_{0:02d}.xhtml'.format(chapter_index + 1)

    print('{}'.format(chapter_title))

    # Set up en EPUB chapter, ...
    epub_chapter = epub.EpubHtml(title=chapter_title,
                                 file_name=chapter_file_name)

    # ... add it to the table of contents
    book_toc.append(
        epub.Link(href=chapter_file_name,
                  title=chapter['humanchapter'],
                  uid=chapter_file_name.split('.')[0]))

    # ... and compile the HTML contents for it
    doc, tag, text = Doc().tagtext()

    with tag('h1'):
        text(chapter_title)

    for section_index, section in chapter['sections'].iterrows():
        section_title = '{}.{} {}'.format(chapter_index + 1, section_index + 1,
Exemplo n.º 17
0
def worker(book, number, chapters, link):
    chapter = reqJson(f"{link}{number}")
    c = epub.EpubHtml(title=chapter['title'], file_name=f'chap_{number}.xhtml', lang='hr')
    c.content=chapter['content']
    chapters[chapter['chapterId']] = c
    return
Exemplo n.º 18
0
    def write_epub(self, save_dir: Path = Path(SAVE_DIR), style: str = DEFAULT_CSS, use_cache: bool = USE_CACHE,
                   language: str = LANGUAGE, cache_dir: Path = Path(CACHE_DIR), add_copyright_page=True,
                   end_update=True, ):
        length = len(self.content)
        missing_number = len(self.missing_chapters)
        title = _default_cc.convert(self.book_data.title)
        # 处理过短书籍
        if length < MIN_CHAPTERS:
            black_list_log.append('"%s",  # %s\n' % (self.book_data.url, title))
            logger.debug('《%s》过短。' % title if LANGUAGE in _simplified else '《%s》過短。' % title)
            return
        # 处理缺章
        if (length >= 200 and missing_number >= 10) or missing_number >= 5:
            black_list_log.append('"%s",  # %s\n' % (self.book_data.url, title))
            logger.debug('《%s》一书缺失章节过多,达 %d 章。' % (title, len(self.missing_chapters))
                         if LANGUAGE in _simplified else
                         '《%s》一書缺失章節過多,達 %d 章。' % (title, len(self.missing_chapters)))
            logger.error('"%s",  # %s\n' % (self.book_data.url, title))
            return
        if self.missing_chapters:
            missing_log.append('《%s》\n' % title)
            missing_log.extend(['   - 第 %d 章《%s》- %s\n' % (i.number, i.title, i.url) for i in self.missing_chapters])
            logger.warning('《%s》一书缺 %d 章。' % (title, missing_number) if LANGUAGE in _simplified else
                           '《%s》一書缺 %d 章。' % (title, missing_number))

        book = epub.EpubBook()
        cc = OpenCC('t2s') if language in _simplified else OpenCC('s2t')

        # 设置图书属性
        book_data = self.book_data
        book.set_identifier(_gen_identifier_from_url(book_data.url))
        title = cc.convert(self.book_data.title)
        book.set_title(title)
        book.set_language(language)
        book.add_author(book_data.author)
        # 添加“关于本书”
        detail = '\n'.join(['<p>%s</p>' % cc.convert(para) for para in self.book_data.detail])
        describe = '\n'.join(['<p>%s</p>' % cc.convert(para) for para in self.book_data.describe])
        about = epub.EpubHtml(title=cc.convert('关于本书'), file_name='about.xhtml', lang=language,
                              content='<p><h1>%s</h1></p>%s<p><h3>介绍</h3></p>%s' % (title, detail, describe))
        book.add_item(about)
        # 添加各章节
        counter = 1
        for chapter in self.content:
            chapter_html = epub.EpubHtml(title=cc.convert(chapter.title), file_name='%04d' % counter + '.xhtml',
                                         lang=language, content=cc.convert(chapter.as_html()))
            book.add_item(chapter_html)
            counter += 1

        if add_copyright_page:
            chapter_html = epub.EpubHtml(title=cc.convert('关于著作权'), file_name='copyright.xhtml',
                                         lang=language, content=cc.convert(_copyright_page.as_html()))
            book.add_item(chapter_html)
        # 添加目录
        book.toc = ([i for i in book.items if type(i) == epub.EpubHtml])
        # 添加 Ncx 和 Nav
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())
        # 添加 CSS 样式
        nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
        book.add_item(nav_css)
        # 添加 spine
        book.spine = ['cover', 'nav', *[i for i in book.items if type(i) == epub.EpubHtml]]
        # 写入 epub
        if not save_dir.exists():
            save_dir.mkdir(parents=True)
        save_path = save_dir.cwd() / save_dir / ('%s - 至第 %d 章.epub' % (title, len(self.content)))
        epub.write_epub('writing.epub', book, {})
        shutil.move('./writing.epub', str(save_path))
        logger.debug('已生成《%s》一书。' % title if LANGUAGE in _simplified else '已生成《%s》一書。' % title)
        # 更新缓存中图书信息
        if end_update:
            _dump(identifier=_gen_identifier_from_url(book_data.url), some_obj=self.book_data,
                  cache_dir=cache_dir, use_cache=use_cache)
Exemplo n.º 19
0
                                         encoding='utf-8')


if __name__ == '__main__':
    book = epub.EpubBook()

    # add metadata
    book.set_identifier('sample123456')
    book.set_title('Sample book')
    book.set_language('en')

    book.add_author('Aleksandar Erkalovic')

    # intro chapter
    c1 = epub.EpubHtml(title='Introduction',
                       file_name='intro.xhtml',
                       lang='en')
    c1.content = u'<html><head></head><body><h1>Introduction</h1><p>Introduction paragraph <a class="test">with a link</a> where i explain what is happening.</p></body></html>'

    # about chapter
    c2 = epub.EpubHtml(title='About this book', file_name='about.xhtml')
    c2.content = '<h1>About this book</h1><p>Helou, this is my book! There are many books, but this one is mine.</p>'

    # add chapters to the book
    book.add_item(c1)
    book.add_item(c2)

    # create table of contents
    # - add section
    # - add auto created links to chapters
Exemplo n.º 20
0
def create_epub(chapters: Mapping,
                title: str,
                basename: str,
                use_dnd_decorations: bool = False):
    """Prepare an EPUB file from the list of chapters.

    Parameters
    ==========
    chapters
      A mapping where the keys are chapter names (spines) and the
      values are strings of HTML to be rendered as the chapter
      contents.
    basename
      The basename for saving files (PDFs, etc). The resulting epub
      file will be "{basename}.epub".
    use_dnd_decorations
      If true, style sheets will be included to produce D&D stylized
      stat blocks, etc.

    """
    # Create a new epub book
    book = epub.EpubBook()
    book.set_identifier("id123456")
    book.set_title(title)
    book.set_language("en")
    # Add the css files
    css_template = jinja_env.get_template("dungeonsheets_epub.css")
    dl_widths = {  # Width for dl lists, in 'em' units
        "character-details": 11,
        "combat-stats": 15,
        "proficiencies": 8.5,
        "faction": 6,
        "spellcasting": 12.5,
        "spell-slots": 8,
        "spell-details": 10,
        "beast-stats": 9,
        "feature-details": 5.5,
        "infusion-details": 8.5,
        "magic-item-details": 13.5,
        "monster-details": 15,
    }
    style = css_template.render(use_dnd_decorations=use_dnd_decorations,
                                dl_widths=dl_widths)
    css = epub.EpubItem(
        uid="style_default",
        file_name="style/gm_sheet.css",
        media_type="text/css",
        content=style,
    )
    book.add_item(css)
    # Add paper background
    with open(Path(__file__).parent / "forms/paper-low-res.jpg",
              mode="rb") as fp:
        bg_img = fp.read()
    paper = epub.EpubItem(
        file_name="images/paper.jpg",
        media_type="image/jpeg",
        content=bg_img,
    )
    book.add_item(paper)
    # Create the separate chapters
    toc = ["nav"]
    html_chapters = []
    for chap_title, content in chapters.items():
        chap_fname = chap_title.replace(" - ", "-").replace(" ", "_").lower()
        chap_fname = "{}.html".format(chap_fname)
        chapter = epub.EpubHtml(
            title=chap_title,
            file_name=chap_fname,
            lang="en",
            media_type="application/xhtml+xml",
        )
        chapter.set_content(content)
        chapter.add_item(css)
        book.add_item(chapter)
        html_chapters.append(chapter)
        # Add entries for the table of contents
        toc.append(
            toc_from_headings(html=content,
                              filename=chap_fname,
                              chapter_title=chap_title))
    # Add the table of contents
    book.toc = toc
    book.spine = ("nav", *html_chapters)
    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())
    # Save the file
    epub_fname = f"{basename}.epub"
    epub.write_epub(epub_fname, book)
Exemplo n.º 21
0
from ebooklib import epub
import time

# mobi格式简介 https://www.cnblogs.com/buptzym/p/5249662.html

s = requests.Session()
s.headers = {
    "User-Agent":
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
}

epubTOC = []
book = epub.EpubBook()

# add copyright description
copyright = epub.EpubHtml(title="版权声明", file_name="copyright.html")
copyright.content = """<h1>版权声明</h1>
<p>本工具目的是将免费网络在线小说转换成方便kindle用户阅读的mobi电子书, 作品版权归原作者或网站所有, 请不要将该工具用于非法用途。</p>
<p>GitHub: https://github.com/fondoger/qidian2mobi<p>
"""
book.add_item(copyright)
epubTOC.append(epub.Link("copyright.html", "版权声明", "intro"))


def handle_url(url):
    # handle urls like `//example.com`
    if url[:2] == '//':
        return "http:" + url
    return url

Exemplo n.º 22
0
    def build(self):
        '''build issue, downloading articles if needed, and write ebook'''

        self.fetch_issue()
        self.info()

        for s in self.sections:
            s.build(self.db)

        book = epub.EpubBook()

        # add metadata
        book.set_title(self.title)
        book.set_identifier(self.id)
        book.set_language(self.language)
        book.add_author(self.author)

        toc = []
        spine = []

        if self.cover_img:
            img = fetch(self.cover_img).content
            book.set_cover("image.jpg", img)
            spine.append('cover')

        spine.append('nav')

        # Sections
        for section in self.sections:
            items = []

            for article in section.articles:
                if not article.content:
                    logging.error('%s could not be downloaded. Skipping.',
                                  article.url)
                    continue
                item = epub.EpubHtml(title=article.title,
                                     file_name='{}.xhtml'.format(
                                         article.title),
                                     lang=self.language)
                item.content = article.content

                # images were downloaded by the article, and placed
                # in disk for refenrence. We now add them to the book.
                for filename in article.images:
                    img = epub.EpubImage()
                    img.file_name = filename
                    with open(filename, 'rb') as f:
                        img.content = f.read()
                    book.add_item(img)
                items.append(item)

            for item in items:
                book.add_item(item)
            toc.append((epub.Section(section.title,
                                     href=items[0].file_name), items))
            spine.extend(items)

        book.toc = toc
        book.spine = spine

        # add navigation files
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # create epub file
        epub.write_epub('{}.epub'.format(self.id), book, {})
Exemplo n.º 23
0
def getDataForEbook(url):
    """
    For now the url must be of the index of an oreilly internet ebook
    I plan to create a template file that will allow this script to read from just about
    any blog or website and turn it into an ebook.
    with the URL the script will look for the webpage and load it into memory to create
    the book Table of Contents, and after that it will create each chapter separately in its
    own folder, and to finish it up, it will wrap all into a single epub file.

    chapters  type: array[str]
               var: It will hold the information of all the chapters of the book
                    May in the future become a problem if the amount of data is too large
                    for it to handle

    authors   type: array[str]
               var: Keeps the names of the authors

    links     type: array[str]
               var: holds the links of every chapter for the ebook

    book      type: set{}
               var: Container for many important metadata for the ebook

    book_slug type: unicode
               var: slugify the url

    book_download_path 
              type: str
               var: the path of the download folder for the book to be created

    eBook     type: ebooklib
               var: constructor of the ebook
    """
    #creation of the variables necessary to create the ebook
    chapters = ['']
    authors = []
    links = []
    book = {}

    # first it will drop "http[s]://" and "index.html", if present:
    simplified_url = url.split('://')[-1].split('index.html')[0]
    if VERBOSE:
        print 'simplified url:', simplified_url
    #then we will create the book folder... turns out it has to be unicode, so we fix that here
    book_slug = slugify(unicode(simplified_url, "utf-8"))
    book_download_path = os.path.join(DOWNLOADS_PATH, book_slug)
    #in case the book folder is not present, it will create one.
    if not os.path.isdir(book_download_path):
        os.mkdir(book_download_path)
        if VERBOSE:
            print 'CREATING book_download_path ({})'.format(book_download_path)

    #Creating eBook creator
    eBook = epub.EpubBook()
    #Capturing the url to run BS4 on it
    resp = get_page(url)
    soup = BeautifulSoup(resp, "lxml", from_encoding="UTF-8")

    #url_root is the root of the book, where you find the table of contents (the link for all the chapters)
    url_root = url[:url.index("index")]
    #now we need to find the title of the book, usually it is an h1 with class "title"
    book["Title"] = soup.find('h1', class_="title").getText()
    #capture the authors of the book and put all to the authors ina variable to put into the metadata
    for author in soup.find_all("h3", class_="author"):
        authors.append(author.getText())
    #this is the metadata
    book["Authors"] = authors
    #load the whole section "table of contents" (toc) into the container
    book["TOC"] = str(soup.find('div', class_="toc"))

    #creates the TOC.html of the book
    with open(os.path.join(book_download_path, "TOC.html"), "w") as text_file:
        text_file.write("<!-- " + book["Title"] + " -->\n")
        text_file.write(book["TOC"])

    #to select the chapters it will look inside the TOC for links for chapters
    #those are prepared to capture only the chapters without the # markups and
    #only following the ORilley chapter names.
    for link in soup.find('div', class_="toc").find_all('a', href=True):
        if "#" not in link['href']:
            if 'pr' in link['href']:
                links.append(link['href'])

            if 'ch' in link['href']:
                links.append(link['href'])

    #setup the metadata
    eBook.set_identifier(book["Title"])
    eBook.set_title(book["Title"])
    eBook.set_language(LANGUAGE)
    #adding the authors into ebook metadata
    for author in book["Authors"]:
        eBook.add_author(author)

    #look for the files inside the book downloaded path
    f_ = os.listdir(book_download_path)
    #and then run the links looking for each one inside the local path looking for files missing.
    for link in links:
        if link in f_:
            print "Local file found:", link
            with open(os.path.join(book_download_path, link),
                      "r") as text_file:
                resp = text_file.read()
        else:
            print "Downloading file:", link
            resp = get_page(url_root + link)

        soup = BeautifulSoup(resp, "lxml", from_encoding="UTF-8")

        try:
            c = epub.EpubHtml(title=soup.find('h1', class_="title").getText(),
                              file_name=link,
                              lang='en')
            c.content = createChapter(url_root, link, book_download_path, resp)
            chapters.append(c)
            eBook.add_item(c)
        except AttributeError:
            c = epub.EpubHtml(title=soup.find('h2', class_="title").getText(),
                              file_name=link,
                              lang='en')
            c.content = createChapter(url_root, link, book_download_path, resp)
            chapters.append(c)
            eBook.add_item(c)

    eBook.toc = chapters

    eBook.add_item(epub.EpubNcx())
    eBook.add_item(epub.EpubNav())

    # define css style
    style = ""
    with open(os.path.join(STYLE_PATH, STYLE), "r") as text_file:
        style = text_file.read()

    if VERBOSE:
        print "Applying style", STYLE
    # add css file
    nav_css = epub.EpubItem(uid="style_nav",
                            file_name="style/nav.css",
                            media_type="text/css",
                            content=style)
    eBook.add_item(nav_css)

    # create spine
    eBook.spine = chapters
    time_elapsed = time.time()
    if VERBOSE:
        print "Starting book creation..."
    # create epub file
    epub.write_epub(os.path.join(DOWNLOADS_PATH, book["Title"] + '.epub'),
                    eBook, {})
    print "Done,", os.path.join(DOWNLOADS_PATH,
                                book["Title"] + '.epub'), "created!"
    print "Time elapsed", time.time() - time_elapsed
Exemplo n.º 24
0
    def process_message(self, message_json: str) -> bool:
        self.logger.debug(f"processing message {message_json}")

        # parse the message
        bind_ebook_msg = BindEBookMessage.from_json(message_json)

        # fetch the user record
        user = self.user_repository.get(bind_ebook_msg.user_id)

        if user is None:
            self.logger.error(
                f"couldn't fetch user with id {bind_ebook_msg.user_id}")
            return False

        # fetch the articles for the user
        articles = self.article_repository.get_all(user.user_id)

        if len(articles) < 1:
            # we want to exit early, but this is not an exceptional case, so we'll return True
            # so that the consumer deletes the message
            self.logger.info(
                f"no articles ready for binding for user {user.user_id}")
            return True

        # create the ebook model
        ebook_model = EBook(user.user_id)

        # create an ebooklib ebook
        ebook = epub.EpubBook()

        chapters = []
        related_items = []

        # for each article:
        for i, article in enumerate(articles):
            # fetch the content from S3
            article_content = self.file_repository.get(article.content_key)

            if article_content is None:
                self.logger.error(
                    f"couldn't fetch the saved content for article {article.article_id}"
                )
                continue

            # add the article ID to the ebook model
            ebook_model.article_ids.append(article.article_id)

            # create an ebooklib chapter
            chapter = epub.EpubHtml(title=article.title,
                                    file_name=f"chapter_{i}.xhtml",
                                    lang="en")

            # add the content to the chapter
            chapter.set_content(article_content.read())

            # for each related content:
            for j, related_content in enumerate(article.related_content):
                # fetch the related content from S3
                item_content = self.file_repository.get(
                    related_content.content_key)

                if item_content is None:
                    self.logger.error(
                        f"couldn't fetch the saved related content for article {article.article_id} and related content {related_content.content_key}"
                    )
                    continue

                # create the ebooklib item
                related_item = epub.EpubItem(
                    uid=f"related_item{i}",
                    file_name=related_content.content_key,
                    media_type=related_content.mime_type,
                    content=item_content.read(),
                )

                related_items.append(related_item)

            chapters.append(chapter)

        # add the chapters to the ebook
        for chapter in chapters:
            ebook.add_item(chapter)

        # add the images to the ebook as linked content
        for item in related_items:
            ebook.add_item(item)

        # add ebook metadata
        ebook.set_identifier("")  # TODO: Find a value for this
        ebook.set_title("")  # TODO: Find a value for this
        ebook.add_author("")  # TODO: Find a value for this
        ebook.set_language("en")

        # create the ebook nav structure
        ebook.spine = chapters
        ebook.toc = chapters

        epub_path = Path.cwd() / f"{ebook_model.ebook_id}.epub"

        try:
            # render the ebook and write it to a local file
            epub.write_epub(str(epub_path), ebook)

            content_key = f"{ebook_model.user_id}/books/{ebook_model.ebook_id}.epub"

            # read the local file into a bytestream
            with epub_path.open(mode="rb") as f:
                # write the bytestream to S3 and update the content_key on the ebook model
                if not self.file_repository.put(content_key, f):
                    self.logger.exception("unable to push ebook content to S3")
                    return False
        except Exception:
            self.logger.exception("unable to write ebook to local file store")
            return False
        finally:
            # remove the temporary ePub file
            epub_path.unlink()

        ebook_model.content_key = content_key

        # write the ebook model to Dynamo
        if not self.ebook_repository.put(ebook_model):
            self.logger.error(
                f"unable to write ebook record to Dynamo for user {user.user_id}"
            )
            return False

        if user.prefer_kindle:
            self.converter_queue_producer.send_message(
                ConvertEBookMessage(ebook_model.user_id,
                                    ebook_model.ebook_id).to_json())
        else:
            self.postmaster_queue_producer.send_message(
                DeliverEBookMessage(ebook_model.user_id,
                                    ebook_model.ebook_id).to_json())

        return True
Exemplo n.º 25
0
except:
    # exception is also thrown if directory already exists
    print('Something happened while creating pic cache folder. Not necessary a problem.')

print('** {} **\n\n{} posts\n{} pages'.format(info['name'], info['posts'], info['pages']))


# start creating book
book = epub.EpubBook()
book.set_title(info['title'])
book.add_author(info['title'])
book.add_author('Tumblr2book')
book.set_language('en')

# general info chapter
introchapter = epub.EpubHtml(file_name='intro.xhtml')
introchapter.content = '''
<h1> {} </h1>
<p> <a href="{}"> {} </a> </p>
<p> {} </p>
<p> {} posts </p>
<p> Blog last updated {} </p>
<p> Scraped {} </p>
{}
'''.format(info['title'], info['url'], info['url'], info['description'], info['posts'], info['updated'], time.ctime(), di_warning)
book.add_item(introchapter)

# introducing templates
template_names = [
    # parts of posts
    'header', 'picture', 'chatphrase',
Exemplo n.º 26
0
def posts_epub_link(posts):

    book = epub.EpubBook()

    # add metadata
    book.set_title('Articles de Vincent Jousse')
    book.set_language('fr')

    book.add_author('Vincent Jousse')


    for post in posts:
        print post.title
        c1 = epub.EpubHtml(title=post.title, file_name='%s.xhtml' % post.slug, lang='fr')
        c1.content=u'<html><head></head><body><h1>Introduction</h1><p>Voici une belle introduction.</p></body></html>'

    book.add_item(c1)

    # add navigation files
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # define css style
    style = '''
@namespace epub "http://www.idpf.org/2007/ops";

body {
    font-family: Cambria, Liberation Serif, Bitstream Vera Serif, Georgia, Times, Times New Roman, serif;
}

h2 {
     text-align: left;
     text-transform: uppercase;
     font-weight: 200;     
}

ol {
        list-style-type: none;
}

ol > li:first-child {
        margin-top: 0.3em;
}


nav[epub|type~='toc'] > ol > li > ol  {
    list-style-type:square;
}


nav[epub|type~='toc'] > ol > li > ol > li {
        margin-top: 0.3em;
}

'''

    # add css file
    nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
    book.add_item(nav_css)

    # create spine
    book.spine = ['nav', c1 ]

    # create epub file
    epub.write_epub('test.epub', book, {})


    return "/test.epub"
Exemplo n.º 27
0
def create_epub(work):

    book = epub.EpubBook()

    # set metadata
    book.set_identifier(str(work.id))
    book.set_title(work.title)
    book.set_language('en')
    book.add_metadata('DC', 'description', work.work_summary)

    book.add_author(work.user.username)

    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    title_page = epub.EpubHtml(title=work.title,
                               file_name='title_page.xhtml',
                               lang='en')
    content_string = '<center><h1>' + work.work_summary + '</h1><br/><h2>' + work.user.username + '</h2>' + '<br/>Word Count: ' + str(
        work.word_count) + '</center>'
    title_page.content = content_string.encode('utf8')
    book.add_item(title_page)
    book.toc.append(epub.Link('title_page.xhtml', 'Title Page', ''))

    for chapter in work.chapters:
        new_chapter = epub.EpubHtml(title=chapter.title,
                                    file_name=chapter.title + '.xhtml',
                                    lang='en')
        if (chapter.image_url is not None and chapter.image_url != ""):
            if 'http' in chapter.image_url:
                image = requests.get(chapter.image_url).content
            else:
                image = open(chapter.image_url, 'rb').read()
            image_string = "chapter_" + str(chapter.number) + ".jpg"
            image_item = epub.EpubItem(uid="img_1",
                                       file_name=image_string,
                                       media_type="image/jpeg",
                                       content=image)
            book.add_item(image_item)
            if image is not None:
                new_chapter.add_item(image_item)
                if chapter.number == 1:
                    book.set_cover(image_string, image)
            new_chapter.content = "<img src='" + image_string + "'/>"
            new_chapter.content += "<br/><br/><br/>"
        new_chapter.content += chapter.text
        book.add_item(new_chapter)
        book.toc.append(
            epub.Link(chapter.title + '.xhtml', chapter.title,
                      chapter.summary))

    # define CSS style
    style = 'BODY {color: white;}'
    nav_css = epub.EpubItem(uid="style_nav",
                            file_name="style/nav.css",
                            media_type="text/css",
                            content=style)

    # add CSS file
    book.add_item(nav_css)

    # basic spine
    #book.spine = ['nav', c1]

    # write to the file
    epub.write_epub(work.title + '.epub', book, {})
def scrape_books(book_links):
    """Receives a dictionary of categories,books, and their links"""
    def create_epub():
        eb = epub.EpubBook()
        eb.set_identifier(f"{savename}")
        eb.set_title(f"{book}")
        eb.set_language('en')
        eb.add_author(f"{author}")
        style = '''
            body, table {
                margin-left: 100px;
                margin-right: 100px;
                font-size: 28px !important;
                font: 28px Times New Roman !important;
                font-weight: 400 !important;
            }
            '''
        return eb

    for category, values in book_links.items():
        try:
            os.mkdir(category)
        except:
            pass
        print(f"Scraping {category} category. . .")
        current_books = os.listdir(path=category)

        for book, link in tqdm(values.items()):
            browser.get(link)
            soup = BeautifulSoup(browser.page_source, 'lxml')
            author = soup.find('center').text
            savename = re.sub('\W+', ' ', book)
            altsavename = re.sub('\W+', '', book)
            eb_content = browser.page_source
            cork = eb_content.find('Home Page</a>')
            eb_content = eb_content[:cork]
            #creating EPUB
            eb = create_epub()

            #Writing Contents to Epub

            chapter = epub.EpubHtml(title=f"{book}",
                                    file_name='chap_01.xhtml',
                                    lang='hr')
            chapter.content = eb_content
            eb.add_item(chapter)
            spine = ['nav']
            spine.append(chapter)
            nav_css = epub.EpubItem(uid="style_nav",
                                    file_name="style/nav.css",
                                    media_type="text/css",
                                    content=style)
            eb.add_item(nav_css)
            eb.spine = spine
            epub_path = os.path.join(category, f"{savename}.epub")
            epub.write_epub(epub_path, eb, {})

            try:
                pdf.output(name=f'{savename}.pdf', dest='F')
                new_pdf_path = os.path.join(category, f'{savename}.pdf')
                new_epub_path = os.path.join(category, f"{altsavename}.epub")
                shutil.move(src=f'{savename}.pdf', dst=new_pdf_path)
                shutil.move(src=f'{savename}.epub', dst=new_epub_path)
            except:
                pass
Exemplo n.º 29
0
def gen_epub(  # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches  # noqa: C901
    infile: str,
    outfile: Optional[str] = None,
    title: Optional[str] = None,
    start: int = 1,
    pairs: int = 1000,
    chapters: int = 10,
    debug: bool = False,
) -> Optional[str]:
    # fmt: on
    """ gen epub.
        infile: str = r"tests\2.tmx"
        outfile: Optional[str] = None
        title: Optional[str] = None
        start: int = 1
        pairs: int = 1000
        chapters: int = 10
        debug: bool = True
    """

    if debug:
        logzero.loglevel(10)
    else:
        logzero.loglevel(20)

    if not Path(infile).is_file():
        logger.error(" [%s] is not a file or does not exist, exiting...",
                     infile)
        raise SystemExit(1)

    if outfile is None:
        _ = Path(infile).absolute().parent
        stem = Path(infile).absolute().stem
        outfile = str(_ / f"{stem}.epub")

    if title is None:
        title = Path(infile).name

    if start < 1:
        start = 1
    start = start - 1
    if pairs < 0:
        pairs = 1000
    if chapters < 0:
        chapters = 1000

    # xml
    try:
        next(xml_iter(infile))
    except Exception as exc:
        logger.error(" file [%s] maybe not a valid tmx file: %s", infile, exc)
        raise SystemExit(1)

    # ---
    xml_g = xml_iter(infile)

    # skip
    if start > 5000:
        for elm in tqdm(start):
            next(xml_g)
    else:
        for elm in range(start):
            next(xml_g)

    chp_cont = []
    ch_ = 0
    try:
        conn = "<br/>"
        conn = " "
        # for ch_ in trange(chapters):
        for ch_ in range(chapters):
            ct_ = []
            if pairs > 10000:
                for _ in trange(pairs):
                    el_ = next(xml_g)
                    # ct_.append('<br/>&nbsp;&nbsp;'.join([pq(elm).html() for elm in pq(el_)("tuv")]))
                    tuv = [pq(elm).html() for elm in pq(el_)("tuv")]
                    # indent the secon tuv by 10px
                    _ = tuv[
                        0] + f"""<div style="margin-left: 20px">{tuv[1]}</div>"""
                    ct_.append(_)
            else:
                for _ in range(pairs):
                    el_ = next(xml_g)
                    # ct_.append('<br/>&nbsp;&nbsp;'.join([pq(elm).html() for elm in pq(el_)("tuv")]))
                    tuv = [pq(elm).html() for elm in pq(el_)("tuv")]
                    # indent the secon tuv by 10px
                    _ = tuv[
                        0] + f"""<div style="margin-left: 20px">{tuv[1]}</div>"""
                    ct_.append(_)

            chp_cont.append(conn.join(ct_))
    except StopIteration:
        # normal, just collect chapter content
        chp_cont.append(conn.join(ct_))
    except Exception as exc:
        logger.error("collecting sent pairs exc: %s", exc)
    finally:
        final_ch = ch_ + 1

    if final_ch < chapters:
        logger.info(" Only able to collect **%s** chapters", final_ch)

    digits = math.ceil(math.log(chapters) / math.log(10)) + 1

    # refer to https://pypi.org/project/EbookLib/
    _ = """
    # create chapter
    c1 = epub.EpubHtml(title='Intro', file_name='chap_01.xhtml', lang='hr')
    c1.content=u'<h1>Intro heading</h1><p>Zaba je skocila u baru.</p>'

    # add chapter
    book.add_item(c1)

    # define Table Of Contents
    book.toc = (epub.Link('chap_01.xhtml', 'Introduction', 'intro'),
                 (epub.Section('Simple book'),
                 (c1, ))
                )
    # """

    # create chapters
    ch_epub = []
    for elm in range(1, final_ch + 1):
        _ = epub.EpubHtml(title=f"{elm}",
                          file_name=f"chap_{elm:0{digits}d}.xhtml",
                          lang="en")
        # celm = _,
        # globals()[f"c{elm}"] = _

        logger.debug("elm: %s", elm)

        _.content = chp_cont[elm - 1]
        ch_epub.append(_)

    book = epub.EpubBook()
    # set metadata
    book.set_identifier(f"{title}-20200630")
    book.set_title(title)
    book.set_language('en')
    book.add_author('tmx2epub by mu@qq41947782')

    # add chapters nad prepare toc
    # toc = []
    for elm in ch_epub:
        book.add_item(elm)
        # toc.append(elm)

    # define CSS style
    style = 'body { font-family: Times, Times New Roman, serif; }'

    nav_css = epub.EpubItem(
        uid="style_nav",
        file_name="style/nav.css",
        media_type="text/css",
        content=style,
    )

    # add CSS file
    book.add_item(nav_css)

    _ = """
    for elm in range(1, final_ch + 1):
        _ = epub.Link(f"chap_{elm:0{digits}d}.xhtml", f"{elm}", f"{elm}")
        toc.append(_)
        # sect = (epub.Section(f"sect-{elm}"), (chp_cont[elm - 1],))
        # toc.append(sect)
    book.toc = toc
    # """

    book.toc = ((epub.Section(title), ch_epub), )

    # basic spine
    # book.spine = [cover, nav]

    book.spine = ["nav"]
    # book.spine.extend(toc)
    book.spine.extend(ch_epub)

    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    epub.write_epub(outfile, book)

    return outfile
Exemplo n.º 30
0
    def to_epub(self,
                filename: str,
                style: Type[Style] = AutumnStyle,
                font_size: int = 14) -> str:
        """
        Render the current Goosepaper to an epub file on disk
        """
        stories = []

        for prov in self.story_providers:
            new_stories = prov.get_stories()
            for a in new_stories:
                if not a.headline:
                    stories.append(a)
                    continue
                for b in stories:
                    if a.headline == b.headline:
                        break
                else:
                    stories.append(a)

        book = epub.EpubBook()
        title = f"{self.title} - {self.subtitle}"
        book.set_title(title)
        book.set_language("en")

        style_obj = Style()
        css = epub.EpubItem(
            uid="style_default",
            file_name="style/default.css",
            media_type="text/css",
            content=style_obj.get_css(font_size),
        )
        book.add_item(css)

        chapters = []
        links = []
        no_headlines = []
        for story in stories:
            if not story.headline:
                no_headlines.append(story)
        stories = [x for x in stories if x.headline]
        for story in stories:
            file = f"{uuid4().hex}.xhtml"
            title = story.headline
            chapter = epub.EpubHtml(title=title, file_name=file, lang="en")
            links.append(file)
            chapter.content = story.to_html()
            book.add_item(chapter)
            chapters.append(chapter)

        if no_headlines:
            file = f"{uuid4().hex}.xhtml"
            chapter = epub.EpubHtml(title="From Reddit",
                                    file_name=file,
                                    lang="en")
            links.append(file)
            chapter.content = "<br>".join([s.to_html() for s in no_headlines])
            book.add_item(chapter)
            chapters.append(chapter)

        book.toc = chapters
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())
        book.spine = ["nav"] + chapters

        epub.write_epub(filename, book)
        return filename