def __call__(self, oeb, opts): import css_parser self.log = oeb.logger self.opts = opts self.oeb = oeb for item in oeb.manifest.items: self.current_item = item if etree.iselement(item.data): rewrite_links(self.current_item.data, self.url_replacer) elif hasattr(item.data, 'cssText'): css_parser.replaceUrls(item.data, self.url_replacer) if self.oeb.guide: for ref in self.oeb.guide.values(): href = urlnormalize(ref.href) href, frag = urllib.parse.urldefrag(href) replacement = self.rename_map.get(href, None) if replacement is not None: nhref = replacement if frag: nhref += '#' + frag ref.href = nhref if self.oeb.toc: self.fix_toc_entry(self.oeb.toc)
def fix_links(self): ''' Fix references to the split files in other content files. ''' for item in self.oeb.manifest: if etree.iselement(item.data): self.current_item = item base.rewrite_links(item.data, self.rewrite_links)
def mlize_spine(self, oeb_book): output = [''] for item in oeb_book.spine: self.log.debug('Converting %s to Markdown formatted TXT...' % item.href) self.rewrite_ids(item.data, item) rewrite_links(item.data, partial(self.rewrite_link, page=item)) stylizer = Stylizer(item.data, item.href, oeb_book, self.opts, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) output.append('\n\n') return ''.join(output)
def mlize_spine(self, oeb_book): output = [ u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>%s</title></head><body>' % (prepare_string_for_xml(self.book_title)) ] for item in oeb_book.spine: self.log.debug('Converting %s to HTML...' % item.href) self.rewrite_ids(item.data, item) base.rewrite_links(item.data, partial(self.rewrite_link, page=item)) stylizer = Stylizer(item.data, item.href, oeb_book, self.opts) output += self.dump_text(item.data.find(base.tag('xhtml', 'body')), stylizer, item) output.append('\n\n') output.append('</body></html>') return ''.join(output)
def mlize_spine(self, oeb_book): output = [] for item in oeb_book.spine: self.log.debug('Converting %s to HTML...' % item.href) self.rewrite_ids(item.data, item) base.rewrite_links(item.data, partial(self.rewrite_link, page=item)) stylizer = Stylizer(item.data, item.href, oeb_book, self.opts) output += self.dump_text(item.data.find(base.tag('xhtml', 'body')), stylizer, item) output.append('\n\n') if self.opts.htmlz_class_style == 'external': css = u'<link href="style.css" rel="stylesheet" type="text/css" />' else: css = u'<style type="text/css">' + self.get_css( oeb_book) + u'</style>' title = u'<title>%s</title>' % prepare_string_for_xml(self.book_title) output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \ [css] + [title, u'</head><body>'] + output + [u'</body></html>'] return ''.join(output)
def create_oebbook(self, htmlpath, basedir, opts, log, mi): import uuid from ebook_converter.ebooks.conversion.plumber import create_oebbook from ebook_converter.ebooks.oeb.base import (DirContainer, rewrite_links, urlnormalize, BINARY_MIME, OEB_STYLES, xpath, urlquote) from ebook_converter.ebooks.oeb.transforms.metadata import \ meta_info_to_oeb_metadata from ebook_converter.ebooks.html.input import get_filelist from ebook_converter.ebooks.metadata import string_to_authors from ebook_converter.utils.localization import canonicalize_lang import css_parser, logging css_parser.log.setLevel(logging.WARN) self.OEB_STYLES = OEB_STYLES oeb = create_oebbook(log, None, opts, self, encoding=opts.input_encoding, populate=False) self.oeb = oeb metadata = oeb.metadata meta_info_to_oeb_metadata(mi, metadata, log) if not metadata.language: l = canonicalize_lang(getattr(opts, 'language', None)) if not l: oeb.logger.warn('Language not specified') l = get_lang().replace('_', '-') metadata.add('language', l) if not metadata.creator: a = getattr(opts, 'authors', None) if a: a = string_to_authors(a) if not a: oeb.logger.warn('Creator not specified') a = [self.oeb.translate('Unknown')] for aut in a: metadata.add('creator', aut) if not metadata.title: oeb.logger.warn('Title not specified') metadata.add('title', self.oeb.translate('Unknown')) bookid = str(uuid.uuid4()) metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') for ident in metadata.identifier: if 'id' in ident.attrib: self.oeb.uid = metadata.identifier[0] break filelist = get_filelist(htmlpath, basedir, opts, log) filelist = [f for f in filelist if not f.is_binary] htmlfile_map = {} for f in filelist: path = f.path oeb.container = DirContainer(os.path.dirname(path), log, ignore_opf=True) bname = os.path.basename(path) id, href = oeb.manifest.generate(id='html', href=sanitize_file_name(bname)) htmlfile_map[path] = href item = oeb.manifest.add(id, href, 'text/html') if path == htmlpath and '%' in path: bname = urlquote(bname) item.html_input_href = bname oeb.spine.add(item, True) self.added_resources = {} self.log = log self.log('Normalizing filename cases') for path, href in htmlfile_map.items(): self.added_resources[path] = href self.urlnormalize, self.DirContainer = urlnormalize, DirContainer self.urldefrag = urllib.parse.urldefrag self.BINARY_MIME = BINARY_MIME self.log('Rewriting HTML links') for f in filelist: path = f.path dpath = os.path.dirname(path) oeb.container = DirContainer(dpath, log, ignore_opf=True) href = htmlfile_map[path] try: item = oeb.manifest.hrefs[href] except KeyError: item = oeb.manifest.hrefs[urlnormalize(href)] rewrite_links(item.data, functools.partial(self.resource_adder, base=dpath)) for item in oeb.manifest.values(): if item.media_type in self.OEB_STYLES: dpath = None for path, href in self.added_resources.items(): if href == item.href: dpath = os.path.dirname(path) break css_parser.replaceUrls(item.data, functools.partial(self.resource_adder, base=dpath)) toc = self.oeb.toc self.oeb.auto_generated_toc = True titles = [] headers = [] for item in self.oeb.spine: if not item.linear: continue html = item.data title = ''.join(xpath(html, '/h:html/h:head/h:title/text()')) title = re.sub(r'\s+', ' ', title.strip()) if title: titles.append(title) headers.append('(unlabled)') for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'): expr = '/h:html/h:body//h:%s[position()=1]/text()' header = ''.join(xpath(html, expr % tag)) header = re.sub(r'\s+', ' ', header.strip()) if header: headers[-1] = header break use = titles if len(titles) > len(set(titles)): use = headers for title, item in zip(use, self.oeb.spine): if not item.linear: continue toc.add(title, item.href) oeb.container = DirContainer(os.getcwd(), oeb.log, ignore_opf=True) return oeb