def commit_nav_toc(container, toc, lang=None): from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree tocname = find_existing_nav_toc(container) if tocname is None: item = container.generate_item('nav.xhtml', id_prefix='nav') item.set('properties', 'nav') tocname = container.href_to_name(item.get('href'), base=container.opf_name) try: root = container.parsed(tocname) except KeyError: root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8')) et = '{%s}type' % EPUB_NS navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == 'toc'] for x in navs[1:]: extract(x) if navs: nav = navs[0] tail = nav.tail attrib = dict(nav.attrib) nav.clear() nav.attrib.update(attrib) nav.tail = tail else: nav = root.makeelement(XHTML('nav')) first_child(root, XHTML('body')).append(nav) nav.set('{%s}type' % EPUB_NS, 'toc') if toc.toc_title: nav.append(nav.makeelement(XHTML('h1'))) nav[-1].text = toc.toc_title rnode = nav.makeelement(XHTML('ol')) nav.append(rnode) to_href = partial(container.name_to_href, base=tocname) spat = re.compile(r'\s+') def process_node(xml_parent, toc_parent): for child in toc_parent: li = xml_parent.makeelement(XHTML('li')) xml_parent.append(li) title = child.title or '' title = spat.sub(' ', title).strip() a = li.makeelement(XHTML('a' if child.dest else 'span')) a.text = title li.append(a) if child.dest: href = to_href(child.dest) if child.frag: href += '#'+child.frag a.set('href', href) if len(child): ol = li.makeelement(XHTML('ol')) li.append(ol) process_node(ol, child) process_node(rnode, toc) pretty_xml_tree(rnode) for li in rnode.iterdescendants(XHTML('li')): if len(li) == 1: li.text = None li[0].tail = None container.replace(tocname, root)
def beautify_text(raw, syntax): from lxml import etree from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree from calibre.ebooks.chardet import strip_encoding_declarations if syntax == 'xml': root = etree.fromstring(strip_encoding_declarations(raw)) pretty_xml_tree(root) elif syntax == 'css': import logging from calibre.ebooks.oeb.base import serialize, _css_logger from calibre.ebooks.oeb.polish.utils import setup_css_parser_serialization from css_parser import CSSParser, log setup_css_parser_serialization(tprefs['editor_tab_stop_width']) log.setLevel(logging.WARN) log.raiseExceptions = False parser = CSSParser(loglevel=logging.WARNING, # We dont care about @import rules fetcher=lambda x: (None, None), log=_css_logger) data = parser.parseString(raw, href='<string>', validate=False) return serialize(data, 'text/css') else: root = parse(raw, line_numbers=False) pretty_html_tree(None, root) return etree.tostring(root, encoding=unicode)
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True) ).replace( '_TITLE_', prepare_string_for_xml(mi.title) ).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors)) ).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def beautify_text(raw, syntax): from lxml import etree from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree from calibre.ebooks.chardet import strip_encoding_declarations if syntax == 'xml': root = etree.fromstring(strip_encoding_declarations(raw)) pretty_xml_tree(root) elif syntax == 'css': import logging from calibre.ebooks.oeb.base import serialize, _css_logger from calibre.ebooks.oeb.polish.utils import setup_cssutils_serialization from cssutils import CSSParser, log setup_cssutils_serialization(tprefs['editor_tab_stop_width']) log.setLevel(logging.WARN) log.raiseExceptions = False parser = CSSParser(loglevel=logging.WARNING, # We dont care about @import rules fetcher=lambda x: (None, None), log=_css_logger) data = parser.parseString(raw, href='<string>', validate=False) return serialize(data, 'text/css') else: root = parse(raw, line_numbers=False) pretty_html_tree(None, root) return etree.tostring(root, encoding=unicode)
def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None): tocname, root = ensure_container_has_nav(container, lang=lang, previous_nav=previous_nav) nav = ensure_single_nav_of_type(root, 'toc') if toc.toc_title: nav.append(nav.makeelement(XHTML('h1'))) nav[-1].text = toc.toc_title rnode = nav.makeelement(XHTML('ol')) nav.append(rnode) to_href = partial(container.name_to_href, base=tocname) spat = re.compile(r'\s+') def process_node(xml_parent, toc_parent): for child in toc_parent: li = xml_parent.makeelement(XHTML('li')) xml_parent.append(li) title = child.title or '' title = spat.sub(' ', title).strip() a = li.makeelement(XHTML('a' if child.dest else 'span')) a.text = title li.append(a) if child.dest: href = to_href(child.dest) if child.frag: href += '#' + child.frag a.set('href', href) if len(child): ol = li.makeelement(XHTML('ol')) li.append(ol) process_node(ol, child) process_node(rnode, toc) pretty_xml_tree(nav) collapse_li(nav) nav.tail = '\n' if toc.page_list: nav = ensure_single_nav_of_type(root, 'page-list') nav.set('hidden', '') ol = nav.makeelement(XHTML('ol')) nav.append(ol) for entry in toc.page_list: if container.has_name(entry['dest']) and container.mime_map[ entry['dest']] in OEB_DOCS: a = create_nav_li(container, ol, entry, tocname) a.text = str(entry['pagenum']) pretty_xml_tree(nav) collapse_li(nav) container.replace(tocname, root)
def add_stylesheet_links(container, name, text): root = container.parse_xhtml(text, name) head = root.xpath('//*[local-name() = "head"]') if not head: return head = head[0] sheets = tuple(container.manifest_items_of_type(lambda mt: mt in OEB_STYLES)) if not sheets: return for sname in sheets: link = head.makeelement(XHTML('link'), type='text/css', rel='stylesheet', href=container.name_to_href(sname, name)) head.append(link) pretty_xml_tree(head) return serialize(root, 'text/html')
def set_landmarks(container, root, tocname, landmarks): nav = ensure_single_nav_of_type(root, 'landmarks') nav.set('hidden', '') ol = nav.makeelement(XHTML('ol')) nav.append(ol) for entry in landmarks: if entry['type'] and container.has_name( entry['dest']) and container.mime_map[ entry['dest']] in OEB_DOCS: a = create_nav_li(container, ol, entry, tocname) a.set('{%s}type' % EPUB_NS, entry['type']) a.text = entry['title'] or None pretty_xml_tree(nav) collapse_li(nav)
def pretty_print_opf(root): from calibre.ebooks.oeb.polish.pretty import pretty_opf, pretty_xml_tree pretty_opf(root) pretty_xml_tree(root)
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' if fmt not in valid_empty_formats: raise ValueError('Cannot create empty book in the %s format' % fmt) if fmt == 'txt': with open(path, 'wb') as f: if not mi.is_null('title'): f.write(as_bytes(mi.title)) return if fmt == 'docx': from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.docx.writer.container import DOCX from calibre.utils.logging import default_log p = Plumber('a.docx', 'b.docx', default_log) p.setup_options() # Use the word default of one inch page margins for x in 'left right top bottom'.split(): setattr(p.opts, 'margin_' + x, 72) DOCX(p.opts, default_log).write(path, mi, create_empty_document=True) return path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace( '_TITLE_', prepare_string_for_xml(mi.title)).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string( mi.authors))).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0o755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' if fmt not in valid_empty_formats: raise ValueError('Cannot create empty book in the %s format' % fmt) if fmt == 'txt': with open(path, 'wb') as f: if not mi.is_null('title'): f.write(mi.title) return if fmt == 'docx': from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.docx.writer.container import DOCX from calibre.utils.logging import default_log p = Plumber('a.docx', 'b.docx', default_log) p.setup_options() # Use the word default of one inch page margins for x in 'left right top bottom'.split(): setattr(p.opts, 'margin_' + x, 72) DOCX(p.opts, default_log).write(path, mi, create_empty_document=True) return path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True) ).replace( '_TITLE_', prepare_string_for_xml(mi.title) ).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors)) ).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None): from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree tocname = find_existing_nav_toc(container) if previous_nav is not None: nav_name = container.href_to_name(previous_nav[0]) if nav_name and container.exists(nav_name): tocname = nav_name container.apply_unique_properties(tocname, 'nav') if tocname is None: item = container.generate_item('nav.xhtml', id_prefix='nav') item.set('properties', 'nav') tocname = container.href_to_name(item.get('href'), base=container.opf_name) if previous_nav is not None: root = previous_nav[1] else: root = container.parse_xhtml( P('templates/new_nav.html', data=True).decode('utf-8')) container.replace(tocname, root) else: root = container.parsed(tocname) if lang: lang = lang_as_iso639_1(lang) or lang root.set('lang', lang) root.set('{%s}lang' % XML_NS, lang) nav = ensure_single_nav_of_type(root, 'toc') if toc.toc_title: nav.append(nav.makeelement(XHTML('h1'))) nav[-1].text = toc.toc_title rnode = nav.makeelement(XHTML('ol')) nav.append(rnode) to_href = partial(container.name_to_href, base=tocname) spat = re.compile(r'\s+') def process_node(xml_parent, toc_parent): for child in toc_parent: li = xml_parent.makeelement(XHTML('li')) xml_parent.append(li) title = child.title or '' title = spat.sub(' ', title).strip() a = li.makeelement(XHTML('a' if child.dest else 'span')) a.text = title li.append(a) if child.dest: href = to_href(child.dest) if child.frag: href += '#' + child.frag a.set('href', href) if len(child): ol = li.makeelement(XHTML('ol')) li.append(ol) process_node(ol, child) process_node(rnode, toc) pretty_xml_tree(nav) def collapse_li(parent): for li in parent.iterdescendants(XHTML('li')): if len(li) == 1: li.text = None li[0].tail = None collapse_li(nav) nav.tail = '\n' def create_li(ol, entry): li = ol.makeelement(XHTML('li')) ol.append(li) a = li.makeelement(XHTML('a')) li.append(a) href = container.name_to_href(entry['dest'], tocname) if entry['frag']: href += '#' + entry['frag'] a.set('href', href) return a if landmarks is not None: nav = ensure_single_nav_of_type(root, 'landmarks') nav.set('hidden', '') ol = nav.makeelement(XHTML('ol')) nav.append(ol) for entry in landmarks: if entry['type'] and container.has_name( entry['dest']) and container.mime_map[ entry['dest']] in OEB_DOCS: a = create_li(ol, entry) a.set('{%s}type' % EPUB_NS, entry['type']) a.text = entry['title'] or None pretty_xml_tree(nav) collapse_li(nav) if toc.page_list: nav = ensure_single_nav_of_type(root, 'page-list') nav.set('hidden', '') ol = nav.makeelement(XHTML('ol')) nav.append(ol) for entry in toc.page_list: if container.has_name(entry['dest']) and container.mime_map[ entry['dest']] in OEB_DOCS: a = create_li(ol, entry) a.text = str(entry['pagenum']) pretty_xml_tree(nav) collapse_li(nav) container.replace(tocname, root)
def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None): from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree tocname = find_existing_nav_toc(container) if previous_nav is not None: nav_name = container.href_to_name(previous_nav[0]) if nav_name and container.exists(nav_name): tocname = nav_name container.apply_unique_properties(tocname, 'nav') if tocname is None: item = container.generate_item('nav.xhtml', id_prefix='nav') item.set('properties', 'nav') tocname = container.href_to_name(item.get('href'), base=container.opf_name) if previous_nav is not None: root = previous_nav[1] else: root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8')) container.replace(tocname, root) else: root = container.parsed(tocname) if lang: lang = lang_as_iso639_1(lang) or lang root.set('lang', lang) root.set('{%s}lang' % XML_NS, lang) nav = ensure_single_nav_of_type(root, 'toc') if toc.toc_title: nav.append(nav.makeelement(XHTML('h1'))) nav[-1].text = toc.toc_title rnode = nav.makeelement(XHTML('ol')) nav.append(rnode) to_href = partial(container.name_to_href, base=tocname) spat = re.compile(r'\s+') def process_node(xml_parent, toc_parent): for child in toc_parent: li = xml_parent.makeelement(XHTML('li')) xml_parent.append(li) title = child.title or '' title = spat.sub(' ', title).strip() a = li.makeelement(XHTML('a' if child.dest else 'span')) a.text = title li.append(a) if child.dest: href = to_href(child.dest) if child.frag: href += '#'+child.frag a.set('href', href) if len(child): ol = li.makeelement(XHTML('ol')) li.append(ol) process_node(ol, child) process_node(rnode, toc) pretty_xml_tree(nav) def collapse_li(parent): for li in parent.iterdescendants(XHTML('li')): if len(li) == 1: li.text = None li[0].tail = None collapse_li(nav) nav.tail = '\n' def create_li(ol, entry): li = ol.makeelement(XHTML('li')) ol.append(li) a = li.makeelement(XHTML('a')) li.append(a) href = container.name_to_href(entry['dest'], tocname) if entry['frag']: href += '#' + entry['frag'] a.set('href', href) return a if landmarks is not None: nav = ensure_single_nav_of_type(root, 'landmarks') nav.set('hidden', '') ol = nav.makeelement(XHTML('ol')) nav.append(ol) for entry in landmarks: if entry['type'] and container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS: a = create_li(ol, entry) a.set('{%s}type' % EPUB_NS, entry['type']) a.text = entry['title'] or None pretty_xml_tree(nav) collapse_li(nav) if toc.page_list: nav = ensure_single_nav_of_type(root, 'page-list') nav.set('hidden', '') ol = nav.makeelement(XHTML('ol')) nav.append(ol) for entry in toc.page_list: if container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS: a = create_li(ol, entry) a.text = str(entry['pagenum']) pretty_xml_tree(nav) collapse_li(nav) container.replace(tocname, root)