def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id): if not isinstance(node.tag, string_or_bytes) \ or namespace(node.tag) != XHTML_NS: return tag = barename(node.tag) style = stylizer.style(node) cssdict = style.cssdict() try: font_size = style['font-size'] except: font_size = self.sbase if self.sbase is not None else \ self.context.source.fbase if tag == 'body' and isinstance(font_size, numbers.Number): stylizer.body_font_size = font_size if 'align' in node.attrib: if tag != 'img': cssdict['text-align'] = node.attrib['align'] if cssdict['text-align'] == 'center': # align=center causes tables to be center aligned, # which text-align does not. And the ever trustworthy Word # uses this construct in its HTML output. See # https://bugs.launchpad.net/bugs/1569583 if tag == 'table': if 'margin-left' not in cssdict and 'margin-right' not in cssdict: cssdict['margin-left'] = cssdict[ 'margin-right'] = 'auto' else: for table in node.iterchildren(XHTML("table")): ts = stylizer.style(table) if ts.get('margin-left') is None and ts.get( 'margin-right') is None: ts.set('margin-left', 'auto') ts.set('margin-right', 'auto') else: val = node.attrib['align'] if val in ('middle', 'bottom', 'top'): cssdict['vertical-align'] = val elif val in ('left', 'right'): cssdict['float'] = val del node.attrib['align'] if 'valign' in node.attrib and tag == 'td': if cssdict.get('vertical-align') == 'inherit': cssdict['vertical-align'] = node.attrib['valign'] del node.attrib['valign'] if node.tag == XHTML('font'): tags = [ 'descendant::h:%s' % x for x in ('p', 'div', 'table', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote') ] tag = 'div' if XPath('|'.join(tags))(node) else 'span' node.tag = XHTML(tag) if 'size' in node.attrib: def force_int(raw): return int(re.search(r'([0-9+-]+)', raw).group(1)) size = node.attrib['size'].strip() if size: fnums = self.context.source.fnums if size[0] in ('+', '-'): # Oh, the warcrimes try: esize = 3 + force_int(size) except: esize = 3 if esize < 1: esize = 1 if esize > 7: esize = 7 font_size = fnums[esize] else: try: font_size = fnums[force_int(size)] except: font_size = fnums[3] cssdict['font-size'] = '%.1fpt' % font_size del node.attrib['size'] if 'face' in node.attrib: cssdict['font-family'] = node.attrib['face'] del node.attrib['face'] if 'color' in node.attrib: try: cssdict['color'] = Property('color', node.attrib['color']).value except (ValueError, SyntaxErr): pass del node.attrib['color'] if 'bgcolor' in node.attrib: try: cssdict['background-color'] = Property( 'background-color', node.attrib['bgcolor']).value except (ValueError, SyntaxErr): pass del node.attrib['bgcolor'] if tag == 'ol' and 'type' in node.attrib: del node.attrib['type'] if cssdict.get('font-weight', '').lower() == 'medium': cssdict[ 'font-weight'] = 'normal' # ADE chokes on font-weight medium fsize = font_size is_drop_cap = ( cssdict.get('float', None) == 'left' and 'font-size' in cssdict and len(node) == 0 and node.text and (len(node.text) == 1 or (len(node.text) == 2 and 0x2000 <= ord(node.text[0]) <= 0x206f))) # Detect drop caps generated by the docx input plugin if node.tag and node.tag.endswith('}p') and len(node) == 0 and node.text and len(node.text.strip()) == 1 and \ not node.tail and 'line-height' in cssdict and 'font-size' in cssdict: dp = node.getparent() if dp.tag and dp.tag.endswith('}div') and len( dp) == 1 and not dp.text: if stylizer.style(dp).cssdict().get('float', None) == 'left': is_drop_cap = True if not self.context.disable_font_rescaling and not is_drop_cap: _sbase = self.sbase if self.sbase is not None else \ self.context.source.fbase dyn_rescale = node.attrib.pop('data-calibre-rescale', None) if dyn_rescale is not None: try: dyn_rescale = float(dyn_rescale) / 100.0 except Exception: dyn_rescale = 1 fsize = self.fmap[_sbase] fsize *= dyn_rescale cssdict['font-size'] = '%0.5fem' % (fsize / psize) psize = fsize elif 'font-size' in cssdict or tag == 'body': fsize = self.fmap[font_size] try: cssdict['font-size'] = "%0.5fem" % (fsize / psize) except ZeroDivisionError: cssdict['font-size'] = '%.1fpt' % fsize psize = fsize try: minlh = self.context.minimum_line_height / 100. if not is_drop_cap and style['line-height'] < minlh * fsize: cssdict['line-height'] = str(minlh) except: self.oeb.logger.exception('Failed to set minimum line-height') if cssdict: for x in self.filter_css: popval = cssdict.pop(x, None) if self.body_font_family and popval and x == 'font-family' \ and popval.partition(',')[0][1:-1] == self.body_font_family.partition(',')[0][1:-1]: cssdict[x] = popval if cssdict: if self.lineh and self.fbase and tag != 'body': self.clean_edges(cssdict, style, psize) if 'display' in cssdict and cssdict['display'] == 'in-line': cssdict['display'] = 'inline' if self.unfloat and 'float' in cssdict \ and cssdict.get('display', 'none') != 'none': del cssdict['display'] if self.untable and 'display' in cssdict \ and cssdict['display'].startswith('table'): display = cssdict['display'] if display == 'table-cell': cssdict['display'] = 'inline' else: cssdict['display'] = 'block' if 'vertical-align' in cssdict \ and cssdict['vertical-align'] == 'sup': cssdict['vertical-align'] = 'super' if self.lineh and 'line-height' not in cssdict: lineh = self.lineh / psize cssdict['line-height'] = "%0.5fem" % lineh if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'): if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle': for prop in ('margin', 'padding', 'border'): for edge in ('top', 'bottom'): cssdict['%s-%s' % (prop, edge)] = '0pt' if self.context.insert_blank_line: cssdict['margin-top'] = cssdict['margin-bottom'] = \ '%fem'%self.context.insert_blank_line_size indent_size = self.context.remove_paragraph_spacing_indent_size keep_indents = indent_size < 0.0 if (self.context.remove_paragraph_spacing and not keep_indents and cssdict.get('text-align', None) not in ('center', 'right')): cssdict['text-indent'] = "%1.1fem" % indent_size pseudo_classes = style.pseudo_classes(self.filter_css) if cssdict or pseudo_classes: keep_classes = set() if cssdict: items = sorted(iteritems(cssdict)) css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) classes = node.get('class', '').strip() or 'calibre' classes_list = classes.split() # lower() because otherwise if the document uses the same class # name with different case, both cases will apply, leading # to incorrect results. klass = ascii_text(STRIPNUM.sub( '', classes_list[0])).lower().strip().replace(' ', '_') if css in styles: match = styles[css] else: match = klass + str(names[klass] or '') styles[css] = match names[klass] += 1 node.attrib['class'] = match keep_classes.add(match) for psel, cssdict in iteritems(pseudo_classes): items = sorted(iteritems(cssdict)) css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) pstyles = pseudo_styles[psel] if css in pstyles: match = pstyles[css] else: # We have to use a different class for each psel as # otherwise you can have incorrect styles for a situation # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green } # If the pcalibre class for a:hover and a:link is the same, # then the class attribute for a.x tags will contain both # that class and the class for a.x:hover, which is wrong. klass = 'pcalibre' match = klass + str(names[klass] or '') pstyles[css] = match names[klass] += 1 keep_classes.add(match) node.attrib['class'] = ' '.join(keep_classes) elif 'class' in node.attrib: del node.attrib['class'] if 'style' in node.attrib: del node.attrib['style'] for child in node: self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
def process_nav_node(container, node, toc_parent, nav_name): for li in node.iterchildren(XHTML('li')): child = add_from_li(container, li, toc_parent, nav_name) ol = first_child(li, XHTML('ol')) if child is not None and ol is not None: process_nav_node(container, ol, child, nav_name)
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css='', base_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from calibre.customize.ui import output_profiles for x in output_profiles(): if x.short_name == 'default': self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.body_font_size = self.profile.fbase self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] if base_css: stylesheets.append(parseString(base_css, validate=False)) style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]') # Add css_parser parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) for elem in style_tags: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES and media_ok(elem.get('media'))): text = elem.text if elem.text else '' for x in elem: t = getattr(x, 'text', None) if t: text += '\n\n' + force_unicode(t, 'utf-8') t = getattr(x, 'tail', None) if t: text += '\n\n' + force_unicode(t, 'utf-8') if text: text = oeb.css_preprocessor(text) # We handle @import rules separately parser.setFetcher(lambda x: ('utf-8', b'')) stylesheet = parser.parseString(text, href=cssname, validate=False) parser.setFetcher(self._fetch_css_file) for rule in stylesheet.cssRules: if rule.type == rule.IMPORT_RULE: ihref = item.abshref(rule.href) if not media_ok(rule.media.mediaText): continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: self.logger.warn( 'Ignoring missing stylesheet in @import rule:', rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in OEB_STYLES: self.logger.warn( 'CSS @import of non-CSS file %r' % rule.href) continue stylesheets.append(sitem.data) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) stylesheets.append(stylesheet) elif (elem.tag == XHTML('link') and elem.get('href') and elem.get('rel', 'stylesheet').lower() == 'stylesheet' and elem.get('type', CSS_MIME).lower() in OEB_STYLES and media_ok(elem.get('media'))): href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS' % (path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css': extra_css, 'user_css': user_css} for w, x in csses.items(): if x: try: text = x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheets.append(stylesheet) except Exception: self.logger.exception('Failed to parse %s, ignoring.' % w) self.logger.debug('Bad css: ') self.logger.debug(x) # using oeb to store the rules, page rule and font face rules # and generating them again if opts, profile or stylesheets are different if (not hasattr(self.oeb, 'stylizer_rules')) \ or not self.oeb.stylizer_rules.same_rules(self.opts, self.profile, stylesheets): self.oeb.stylizer_rules = StylizerRules(self.opts, self.profile, stylesheets) self.rules = self.oeb.stylizer_rules.rules self.page_rule = self.oeb.stylizer_rules.page_rule self.font_face_rules = self.oeb.stylizer_rules.font_face_rules self.flatten_style = self.oeb.stylizer_rules.flatten_style self._styles = {} pseudo_pat = re.compile( ':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I) select = Select(tree, ignore_inappropriate_pseudo_classes=True) for _, _, cssdict, text, _ in self.rules: fl = pseudo_pat.search(text) try: matches = tuple(select(text)) except SelectorError as err: self.logger.error( 'Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err))) continue if fl is not None: fl = fl.group(1) if fl == 'first-letter' and getattr( self.oeb, 'plumber_output_format', '').lower() in {'mobi', 'docx'}: # Fake first-letter for elem in matches: for x in elem.iter('*'): if x.text: punctuation_chars = [] text = unicode_type(x.text) while text: category = unicodedata.category(text[0]) if category[0] not in {'P', 'Z'}: break punctuation_chars.append(text[0]) text = text[1:] special_text = ''.join(punctuation_chars) + \ (text[0] if text else '') span = x.makeelement('{%s}span' % XHTML_NS) span.text = special_text span.set('data-fake-first-letter', '1') span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: # Element pseudo-class for elem in matches: self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'[0-9.]+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)
def epubify_markup(self, root, log): from calibre.ebooks.oeb.base import XPath, XHTML # Fix empty title tags for t in XPath('//h:title')(root): if not t.text: t.text = u' ' # Fix <p><div> constructs as the asinine epubchecker complains # about them pdiv = XPath('//h:p/h:div') for div in pdiv(root): div.getparent().tag = XHTML('div') # Remove the position:relative as it causes problems with some epub # renderers. Remove display: block on an image inside a div as it is # redundant and prevents text-align:center from working in ADE # Also ensure that the img is contained in its containing div imgpath = XPath('//h:div/h:img[@style]') for img in imgpath(root): div = img.getparent() if len(div) == 1: style = div.attrib.get('style', '') if style and not style.endswith(';'): style = style + ';' style += 'position:static' # Ensures position of containing div is static # Ensure that the img is always contained in its frame div.attrib['style'] = style img.attrib['style'] = 'max-width: 100%; max-height: 100%' # Handle anchored images. The default markup + CSS produced by # odf2xhtml works with WebKit but not with ADE. So we convert the # common cases of left/right/center aligned block images to work on # both webkit and ADE. We detect the case of setting the side margins # to auto and map it to an appropriate text-align directive, which # works in both WebKit and ADE. # https://bugs.launchpad.net/bugs/1063207 # https://bugs.launchpad.net/calibre/+bug/859343 imgpath = XPath('descendant::h:div/h:div/h:img') for img in imgpath(root): div2 = img.getparent() div1 = div2.getparent() if (len(div1), len(div2)) != (1, 1): continue cls = div1.get('class', '') first_rules = filter( None, [self.get_css_for_class(x) for x in cls.split()]) has_align = False for r in first_rules: if r.style.getProperty(u'text-align') is not None: has_align = True ml = mr = None if not has_align: aval = None cls = div2.get(u'class', u'') rules = filter( None, [self.get_css_for_class(x) for x in cls.split()]) for r in rules: ml = r.style.getPropertyCSSValue(u'margin-left') or ml mr = r.style.getPropertyCSSValue(u'margin-right') or mr ml = getattr(ml, 'value', None) mr = getattr(mr, 'value', None) if ml == mr == u'auto': aval = u'center' elif ml == u'auto' and mr != u'auto': aval = 'right' elif ml != u'auto' and mr == u'auto': aval = 'left' if aval is not None: style = div1.attrib.get('style', '').strip() if style and not style.endswith(';'): style = style + ';' style += 'text-align:%s' % aval has_align = True div1.attrib['style'] = style if has_align: # This is needed for ADE, without it the text-align has no # effect style = div2.attrib['style'] div2.attrib['style'] = 'display:inline;' + style
def mobimlize_content(self, tag, text, bstate, istates): 'Convert text content' if text or tag != 'br': bstate.content = True istate = istates[-1] para = bstate.para if tag in SPECIAL_TAGS and not text: para = para if para is not None else bstate.body elif para is None or tag in ('td', 'th'): body = bstate.body if bstate.pbreak: etree.SubElement(body, MBP('pagebreak')) bstate.pbreak = False bstate.istate = None bstate.anchor = None parent = bstate.nested[-1] if bstate.nested else bstate.body indent = istate.indent left = istate.left if isinstance(indent, basestring): indent = 0 if indent < 0 and abs(indent) < left: left += indent indent = 0 elif indent != 0 and abs(indent) < self.profile.fbase: indent = (indent / abs(indent)) * self.profile.fbase if tag in NESTABLE_TAGS and not istate.rendered: para = wrapper = etree.SubElement(parent, XHTML(tag), attrib=istate.attrib) bstate.nested.append(para) if tag == 'li' and len(istates) > 1: istates[-2].list_num += 1 para.attrib['value'] = str(istates[-2].list_num) elif tag in NESTABLE_TAGS and istate.rendered: para = wrapper = bstate.nested[-1] elif not self.opts.mobi_ignore_margins and left > 0 and indent >= 0: ems = self.profile.mobi_ems_per_blockquote para = wrapper = etree.SubElement(parent, XHTML('blockquote')) para = wrapper emleft = int(round(left / self.profile.fbase)) - ems emleft = min((emleft, 10)) while emleft > ems / 2.0: para = etree.SubElement(para, XHTML('blockquote')) emleft -= ems else: para = wrapper = etree.SubElement(parent, XHTML('p')) bstate.inline = bstate.para = para vspace = bstate.vpadding + bstate.vmargin bstate.vpadding = bstate.vmargin = 0 if tag not in TABLE_TAGS: if tag in ('ul', 'ol') and vspace > 0: wrapper.addprevious( etree.Element(XHTML('div'), height=self.mobimlize_measure(vspace))) else: wrapper.attrib['height'] = self.mobimlize_measure(vspace) para.attrib['width'] = self.mobimlize_measure(indent) elif tag == 'table' and vspace > 0: vspace = int(round(vspace / self.profile.fbase)) while vspace > 0: wrapper.addprevious(etree.Element(XHTML('br'))) vspace -= 1 if istate.halign != 'auto' and isinstance(istate.halign, (str, unicode)): para.attrib['align'] = istate.halign istate.rendered = True pstate = bstate.istate if tag in CONTENT_TAGS: bstate.inline = para pstate = bstate.istate = None try: etree.SubElement(para, XHTML(tag), attrib=istate.attrib) except: print 'Invalid subelement:', para, tag, istate.attrib raise elif tag in TABLE_TAGS: para.attrib['valign'] = 'top' if istate.ids: for id_ in istate.ids: anchor = etree.Element(XHTML('a'), attrib={'id': id_}) if tag == 'li': try: last = bstate.body[-1][-1] except: break last.insert(0, anchor) anchor.tail = last.text last.text = None else: last = bstate.body[-1] # We use append instead of addprevious so that inline # anchors in large blocks point to the correct place. See # https://bugs.launchpad.net/calibre/+bug/899831 # This could potentially break if inserting an anchor at # this point in the markup is illegal, but I cannot think # of such a case offhand. if barename(last.tag) in LEAF_TAGS: last.addprevious(anchor) else: last.append(anchor) istate.ids.clear() if not text: return if not pstate or istate != pstate: inline = para fsize = istate.fsize href = istate.href if not href: bstate.anchor = None elif pstate and pstate.href == href: inline = bstate.anchor else: inline = etree.SubElement(inline, XHTML('a'), href=href) bstate.anchor = inline if fsize != 3: inline = etree.SubElement(inline, XHTML('font'), size=str(fsize)) if istate.family == 'monospace': inline = etree.SubElement(inline, XHTML('tt')) if istate.italic: inline = etree.SubElement(inline, XHTML('i')) if istate.bold: inline = etree.SubElement(inline, XHTML('b')) if istate.bgcolor is not None and istate.bgcolor != 'transparent': inline = etree.SubElement(inline, XHTML('span'), bgcolor=istate.bgcolor) if istate.fgcolor != 'black': inline = etree.SubElement(inline, XHTML('font'), color=unicode(istate.fgcolor)) if istate.strikethrough: inline = etree.SubElement(inline, XHTML('s')) if istate.underline: inline = etree.SubElement(inline, XHTML('u')) bstate.inline = inline bstate.istate = istate inline = bstate.inline content = self.preize_text(text) if istate.preserve else [text] for item in content: if isinstance(item, basestring): if len(inline) == 0: inline.text = (inline.text or '') + item else: last = inline[-1] last.tail = (last.tail or '') + item else: inline.append(item)
def workaround_ade_quirks(self): # {{{ ''' Perform various markup transforms to get the output to render correctly in the quirky ADE. ''' from calibre.ebooks.oeb.base import XPath, XHTML, barename, urlunquote stylesheet = self.oeb.manifest.main_stylesheet # ADE cries big wet tears when it encounters an invalid fragment # identifier in the NCX toc. frag_pat = re.compile(r'[-A-Za-z0-9_:.]+$') for node in self.oeb.toc.iter(): href = getattr(node, 'href', None) if hasattr(href, 'partition'): base, _, frag = href.partition('#') frag = urlunquote(frag) if frag and frag_pat.match(frag) is None: self.log.warn( 'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it' % frag) node.href = base for x in self.oeb.spine: root = x.data body = XPath('//h:body')(root) if body: body = body[0] if hasattr(body, 'xpath'): # remove <img> tags with empty src elements bad = [] for x in XPath('//h:img')(body): src = x.get('src', '').strip() if src in ('', '#') or src.startswith('http:'): bad.append(x) for img in bad: img.getparent().remove(img) # Add id attribute to <a> tags that have name for x in XPath('//h:a[@name]')(body): if not x.get('id', False): x.set('id', x.get('name')) # The delightful epubcheck has started complaining about <a> tags that # have name attributes. x.attrib.pop('name') # Replace <br> that are children of <body> as ADE doesn't handle them for br in XPath('./h:br')(body): if br.getparent() is None: continue try: prior = next(br.itersiblings(preceding=True)) priortag = barename(prior.tag) priortext = prior.tail except: priortag = 'body' priortext = body.text if priortext: priortext = priortext.strip() br.tag = XHTML('p') br.text = '\u00a0' style = br.get('style', '').split(';') style = list(filter(None, map(lambda x: x.strip(), style))) style.append('margin:0pt; border:0pt') # If the prior tag is a block (including a <br> we replaced) # then this <br> replacement should have a 1-line height. # Otherwise it should have no height. if not priortext and priortag in block_level_tags: style.append('height:1em') else: style.append('height:0pt') br.set('style', '; '.join(style)) for tag in XPath('//h:embed')(root): tag.getparent().remove(tag) for tag in XPath('//h:object')(root): if tag.get('type', '').lower().strip() in { 'image/svg+xml', 'application/svg+xml' }: continue tag.getparent().remove(tag) for tag in XPath('//h:title|//h:style')(root): if not tag.text: tag.getparent().remove(tag) for tag in XPath('//h:script')(root): if (not tag.text and not tag.get('src', False) and tag.get('type', None) != 'text/x-mathjax-config'): tag.getparent().remove(tag) for tag in XPath('//h:body/descendant::h:script')(root): tag.getparent().remove(tag) formchildren = XPath('./h:input|./h:button|./h:textarea|' './h:label|./h:fieldset|./h:legend') for tag in XPath('//h:form')(root): if formchildren(tag): tag.getparent().remove(tag) else: # Not a real form tag.tag = XHTML('div') for tag in XPath('//h:center')(root): tag.tag = XHTML('div') tag.set('style', 'text-align:center') # ADE can't handle & in an img url for tag in XPath('//h:img[@src]')(root): tag.set('src', tag.get('src', '').replace('&', '')) # ADE whimpers in fright when it encounters a <td> outside a # <table> in_table = XPath('ancestor::h:table') for tag in XPath('//h:td|//h:tr|//h:th')(root): if not in_table(tag): tag.tag = XHTML('div') # ADE fails to render non breaking hyphens/soft hyphens/zero width spaces special_chars = re.compile('[\u200b\u00ad]') for elem in root.iterdescendants('*'): if elem.text: elem.text = special_chars.sub('', elem.text) elem.text = elem.text.replace('\u2011', '-') if elem.tail: elem.tail = special_chars.sub('', elem.tail) elem.tail = elem.tail.replace('\u2011', '-') if stylesheet is not None: # ADE doesn't render lists correctly if they have left margins from css_parser.css import CSSRule for lb in XPath('//h:ul[@class]|//h:ol[@class]')(root): sel = '.' + lb.get('class') for rule in stylesheet.data.cssRules.rulesOfType( CSSRule.STYLE_RULE): if sel == rule.selectorList.selectorText: rule.style.removeProperty('margin-left') # padding-left breaks rendering in webkit and gecko rule.style.removeProperty('padding-left') # Change whitespace:pre to pre-wrap to accommodate readers that # cannot scroll horizontally for rule in stylesheet.data.cssRules.rulesOfType( CSSRule.STYLE_RULE): style = rule.style ws = style.getPropertyValue('white-space') if ws == 'pre': style.setProperty('white-space', 'pre-wrap')
def process_nav_node(node, toc_parent): for li in node.iterchildren(XHTML('li')): child = add_from_li(li, toc_parent) ol = first_child(li, XHTML('ol')) if child is not None and ol is not None: process_nav_node(ol, child)
def render_jacket(mi, output_profile, alt_title=_('Unknown'), alt_tags=[], alt_comments='', alt_publisher=(''), rescale_fonts=False): css = P('jacket/stylesheet.css', data=True).decode('utf-8') template = P('jacket/template.xhtml', data=True).decode('utf-8') template = re.sub(r'<!--.*?-->', '', template, flags=re.DOTALL) css = re.sub(r'/\*.*?\*/', '', css, flags=re.DOTALL) try: title_str = mi.title if mi.title else alt_title except: title_str = _('Unknown') title_str = escape(title_str) title = '<span class="title">%s</span>' % title_str series = Series(mi.series, mi.series_index) try: publisher = mi.publisher if mi.publisher else alt_publisher except: publisher = '' publisher = escape(publisher) try: if is_date_undefined(mi.pubdate): pubdate = '' else: pubdate = strftime('%Y', mi.pubdate.timetuple()) except: pubdate = '' rating = get_rating(mi.rating, output_profile.ratings_char, output_profile.empty_ratings_char) tags = Tags((mi.tags if mi.tags else alt_tags), output_profile) comments = mi.comments if mi.comments else alt_comments comments = comments.strip() orig_comments = comments if comments: comments = comments_to_html(comments) try: author = mi.format_authors() except: author = '' author = escape(author) def generate_html(comments): args = dict( xmlns=XHTML_NS, title_str=title_str, css=css, title=title, author=author, publisher=publisher, pubdate_label=_('Published'), pubdate=pubdate, series_label=_('Series'), series=series, rating_label=_('Rating'), rating=rating, tags_label=_('Tags'), tags=tags, comments=comments, footer='', searchable_tags=' '.join( escape(t) + 'ttt' for t in tags.tags_list), ) for key in mi.custom_field_keys(): m = mi.get_user_metadata(key, False) or {} try: display_name, val = mi.format_field_extended(key)[:2] dkey = key.replace('#', '_') dt = m.get('datatype') if dt == 'series': args[dkey] = Series(mi.get(key), mi.get(key + '_index')) elif dt == 'rating': args[dkey] = rating_to_stars( mi.get(key), m.get('display', {}).get('allow_half_stars', False)) else: args[dkey] = escape(val) args[dkey + '_label'] = escape(display_name) except Exception: # if the val (custom column contents) is None, don't add to args pass if False: print("Custom column values available in jacket template:") for key in list(args.keys()): if key.startswith('_') and not key.endswith('_label'): print((" %s: %s" % ('#' + key[1:], args[key]))) # Used in the comment describing use of custom columns in templates # Don't change this unless you also change it in template.xhtml args['_genre_label'] = args.get('_genre_label', '{_genre_label}') args['_genre'] = args.get('_genre', '{_genre}') formatter = SafeFormatter() generated_html = formatter.format(template, **args) # Post-process the generated html to strip out empty header items soup = BeautifulSoup(generated_html) if not series: series_tag = soup.find(attrs={'class': 'cbj_series'}) if series_tag is not None: series_tag.extract() if not rating: rating_tag = soup.find(attrs={'class': 'cbj_rating'}) if rating_tag is not None: rating_tag.extract() if not tags: tags_tag = soup.find(attrs={'class': 'cbj_tags'}) if tags_tag is not None: tags_tag.extract() if not pubdate: pubdate_tag = soup.find(attrs={'class': 'cbj_pubdata'}) if pubdate_tag is not None: pubdate_tag.extract() if output_profile.short_name != 'kindle': hr_tag = soup.find('hr', attrs={'class': 'cbj_kindle_banner_hr'}) if hr_tag is not None: hr_tag.extract() return strip_encoding_declarations( soup.renderContents('utf-8').decode('utf-8')) from calibre.ebooks.oeb.base import RECOVER_PARSER try: root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER) except: try: root = etree.fromstring(generate_html(escape(orig_comments)), parser=RECOVER_PARSER) except: root = etree.fromstring(generate_html(''), parser=RECOVER_PARSER) if rescale_fonts: # We ensure that the conversion pipeline will set the font sizes for # text in the jacket to the same size as the font sizes for the rest of # the text in the book. That means that as long as the jacket uses # relative font sizes (em or %), the post conversion font size will be # the same as for text in the main book. So text with size x em will # be rescaled to the same value in both the jacket and the main content. # # We cannot use calibre_rescale_100 on the body tag as that will just # give the body tag a font size of 1em, which is useless. for body in root.xpath('//*[local-name()="body"]'): fw = body.makeelement(XHTML('div')) fw.set('class', 'calibre_rescale_100') for child in body: fw.append(child) body.append(fw) from calibre.ebooks.oeb.polish.pretty import pretty_html_tree pretty_html_tree(None, root) return root
def resolve_styles(container, name, select=None, sheet_callback=None): root = container.parsed(name) select = select or Select(root, ignore_inappropriate_pseudo_classes=True) style_map = defaultdict(list) pseudo_style_map = defaultdict(list) rule_index_counter = count() pseudo_pat = re.compile( u':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I) def process_sheet(sheet, sheet_name): if sheet_callback is not None: sheet_callback(sheet, sheet_name) for rule, sheet_name, rule_index in iterrules( container, sheet_name, rules=sheet, rule_index_counter=rule_index_counter, rule_type='STYLE_RULE'): for selector in rule.selectorList: text = selector.selectorText try: matches = tuple(select(text)) except SelectorError as err: container.log.error( 'Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err))) continue m = pseudo_pat.search(text) style = normalize_style_declaration(rule.style, sheet_name) if m is None: for elem in matches: style_map[elem].append( StyleDeclaration(specificity(rule_index, selector), style, None)) else: for elem in matches: pseudo_style_map[elem].append( StyleDeclaration(specificity(rule_index, selector), style, m.group(1))) process_sheet(html_css_stylesheet(container), 'user-agent.css') for elem in root.iterdescendants(XHTML('style'), XHTML('link')): if elem.tag.lower().endswith('style'): if not elem.text: continue sheet = container.parse_css(elem.text) sheet_name = name else: if (elem.get('type') or 'text/css').lower() not in OEB_STYLES or \ (elem.get('rel') or 'stylesheet').lower() != 'stylesheet' or \ not media_ok(elem.get('media')): continue href = elem.get('href') if not href: continue sheet_name = container.href_to_name(href, name) if not container.has_name(sheet_name): continue sheet = container.parsed(sheet_name) if not isinstance(sheet, CSSStyleSheet): continue process_sheet(sheet, sheet_name) for elem in root.xpath('//*[@style]'): text = elem.get('style') if text: style = container.parse_css(text, is_declaration=True) style_map[elem].append( StyleDeclaration(Specificity(1, 0, 0, 0, 0), normalize_style_declaration(style, name), None)) for l in (style_map, pseudo_style_map): for x in l.itervalues(): x.sort(key=itemgetter(0), reverse=True) style_map = { elem: resolve_declarations(x) for elem, x in style_map.iteritems() } pseudo_style_map = { elem: resolve_pseudo_declarations(x) for elem, x in pseudo_style_map.iteritems() } return partial(resolve_property, style_map), partial(resolve_pseudo_property, style_map, pseudo_style_map), select
def embed_all_fonts(container, stats, report): all_font_rules = tuple(itervalues(stats.all_font_rules)) warned = set() rules, nrules = [], {} modified = set() for path in container.spine_items: name = container.abspath_to_name(path) fu = stats.font_usage_map.get(name, None) fs = stats.font_spec_map.get(name, None) fr = stats.font_rule_map.get(name, None) if None in (fs, fu, fr): continue fs = {icu_lower(x) for x in fs} for font in itervalues(fu): if icu_lower(font['font-family']) not in fs: continue rule = matching_rule(font, fr) if rule is None: # This font was not already embedded in this HTML file, before # processing started key = font_key(font) rule = nrules.get(key) if rule is None: rule = embed_font(container, font, all_font_rules, report, warned) if rule is not None: rules.append(rule) nrules[key] = rule modified.add(name) stats.font_stats[rule['name']] = font['text'] else: # This font was previously embedded by this code, update its stats stats.font_stats[rule['name']] |= font['text'] modified.add(name) if not rules: report(_('No embeddable fonts found')) return False # Write out CSS rules = [ ';\n\t'.join( '{}: {}'.format(k, '"%s"' % v if k == 'font-family' else v) for k, v in iteritems(rulel) if (k in props and props[k] != v and v != '400') or k == 'src') for rulel in rules ] css = '\n\n'.join(['@font-face {\n\t%s\n}' % r for r in rules]) item = container.generate_item('fonts.css', id_prefix='font_embed') name = container.href_to_name(item.get('href'), container.opf_name) with container.open(name, 'wb') as out: out.write(css.encode('utf-8')) # Add link to CSS in all files that need it for spine_name in modified: root = container.parsed(spine_name) head = root.xpath('//*[local-name()="head"][1]')[0] href = container.name_to_href(name, spine_name) etree.SubElement(head, XHTML('link'), rel='stylesheet', type='text/css', href=href).tail = '\n' container.dirty(spine_name) return True
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from calibre.customize.ui import output_profiles for x in output_profiles(): if x.short_name == 'default': self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] head = xpath(tree, '/h:html/h:head') if head: head = head[0] else: head = [] # Add cssutils parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) self.font_face_rules = [] for elem in head: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES): text = elem.text if elem.text else u'' for x in elem: t = getattr(x, 'text', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') t = getattr(x, 'tail', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') if text: text = XHTML_CSS_NAMESPACE + text text = oeb.css_preprocessor(text) stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \ and elem.get('type', CSS_MIME).lower() in OEB_STYLES: href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS' % (path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css': extra_css, 'user_css': user_css} for w, x in csses.items(): if x: try: text = XHTML_CSS_NAMESPACE + x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) except: self.logger.exception('Failed to parse %s, ignoring.' % w) self.logger.debug('Bad css: ') self.logger.debug(x) rules = [] index = 0 self.stylesheets = set() self.page_rule = {} for stylesheet in stylesheets: href = stylesheet.href self.stylesheets.add(href) for rule in stylesheet.cssRules: rules.extend(self.flatten_rule(rule, href, index)) index = index + 1 rules.sort() self.rules = rules self._styles = {} for _, _, cssdict, text, _ in rules: fl = ':first-letter' in text if fl: text = text.replace(':first-letter', '') selector = get_css_selector(text) matches = selector(tree, self.logger) if fl: from lxml.builder import ElementMaker E = ElementMaker(namespace=XHTML_NS) for elem in matches: for x in elem.iter(): if x.text: punctuation_chars = [] text = unicode(x.text) while text: if not unicodedata.category( text[0]).startswith('P'): break punctuation_chars.append(text[0]) text = text[1:] special_text = u''.join(punctuation_chars) + \ (text[0] if text else u'') span = E.span(special_text) span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'\d+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)
def transform_html(self, name, virtualize_resources): style_xpath = XPath('//h:style') link_xpath = XPath('//h:a[@href]') img_xpath = XPath('//h:img[@src]') res_link_xpath = XPath('//h:link[@href]') root = self.parsed(name) head = ensure_head(root) changed = False for style in style_xpath(root): # Firefox flakes out sometimes when dynamically creating <style> tags, # so convert them to external stylesheets to ensure they never fail if style.text and (style.get('type') or 'text/css').lower() == 'text/css': in_head = has_ancestor(style, head) if not in_head: extract(style) head.append(style) css = style.text style.clear() style.tag = XHTML('link') style.set('type', 'text/css') style.set('rel', 'stylesheet') sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True) style.set('href', self.name_to_href(sname, name)) changed = True # Used for viewing images for img in img_xpath(root): img_name = self.href_to_name(img.get('src'), name) if img_name: img.set('data-calibre-src', img_name) changed = True # Disable non stylsheet link tags. This link will not be loaded by the # browser anyway and will causes the resource load check to hang for link in res_link_xpath(root): ltype = (link.get('type') or 'text/css').lower() rel = (link.get('rel') or 'stylesheet').lower() if ltype != 'text/css' or rel != 'stylesheet': link.attrib.clear() changed = True # Transform <style> and style="" if transform_inline_styles(self, name, transform_sheet=transform_sheet, transform_style=transform_declaration): changed = True if not virtualize_resources: link_uid = self.book_render_data['link_uid'] link_replacer = create_link_replacer(self, link_uid, set()) ltm = self.book_render_data['link_to_map'] for a in link_xpath(root): href = link_replacer(name, a.get('href')) if href and href.startswith(link_uid): a.set('href', 'javascript:void(0)') parts = decode_url(href.split('|')[1]) lname, lfrag = parts[0], parts[1] ltm.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name) a.set( 'data-' + link_uid, json.dumps({ 'name': lname, 'frag': lfrag }, ensure_ascii=False)) changed = True if changed: self.dirty(name)
class RecodeCallbackBase: class __metaclass__(type): def __init__(cls, name, bases, attrs): type.__init__(cls, name, bases, attrs) RecodeCallbackRegistry.register(cls) # NOTE: body is our first document structure tag, so it is safe to use # it as a callback base example tag = XHTML('body') def __init__(self, converter, logger): self.converter = converter self.log = logger # reference counter used for tracking tags nesting self.refcount = 0 # internal stack for data stashing self.datastack = [] def start(self, element): """Element recoding entry method.""" return self.get_begin(element) + self.get_text(element) def stop(self, element): """Element recoding exit method.""" return self.get_end(element) + self.get_tail(element) def push(self, data): """Push data onto the internal stack storage.""" self.datastack.append(data) def pop(self): """Pop data from the internal stack storage.""" return self.datastack.pop() @classmethod def get_begin(cls, element): return "" @classmethod def get_end(cls, element): return "" @classmethod def get_text(cls, element): return cls.sanitize(element.text or "") @classmethod def get_tail(cls, element): return cls.sanitize(element.tail or "") @staticmethod def sanitize(text): """ Return the given text with sanitized TeX special characters. There is ten characters which have special meaning in the TeX document, plus a newline which controls document structure. The list of all of them: &, %, $, #, _, {, }, ~, ^, backslash and newline """ text = re.sub(r'\\', r'\\textbackslash{}', text) text = re.sub(r'\^', r'\\textasciicircum{}', text) text = re.sub(r'~', r'\\textasciitilde{}', text) text = re.sub(r'[&%$#_{}]', r'\\\g<0>', text) return re.sub(r'[\r\n]+', r' ', text) @staticmethod def get_classes(element): """Get classes set attached to the given element.""" return set(element.attrib.get('class', "").split()) @staticmethod def get_class_style(classes): """ Get style functions. In the OEB file format, style is encoded in the class attribute. That method modifies input argument - used classes are removed. """ functions = [] for cls in classes: if cls == 'bold': functions.append("\\textbf{") elif cls == 'italic': functions.append("\\emph{") elif cls == 'underline': functions.append("\\uline{") # remove used (recognized) classes classes.difference_update(( 'bold', 'italic', 'underline', )) return functions @staticmethod def get_class_layout(classes): """ Get layout functions. In the OEB file format layout may be encoded in the class attribute, e.g. page-break. That method modifies input argument - used classes are removed. """ functions = [] for cls in classes: if cls == 'mbppagebreak': functions.append("\\chapter*{") # remove used (recognized) classes classes.difference_update(('mbppagebreak', )) return functions
def extract_css_into_flows(self): inlines = defaultdict(list) # Ensure identical <style>s not repeated sheets = {} for item in self.oeb.manifest: if item.media_type in OEB_STYLES: sheet = self.data(item) if not self.opts.expand_css and hasattr(item.data, 'cssText'): condense_sheet(sheet) sheets[item.href] = len(self.flows) self.flows.append(sheet) def fix_import_rules(sheet): changed = False for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE): if rule.href: href = item.abshref(rule.href) idx = sheets.get(href, None) if idx is not None: idx = to_ref(idx) rule.href = 'kindle:flow:%s?mime=text/css' % idx changed = True return changed for item in self.oeb.spine: root = self.data(item) for link in XPath('//h:link[@href]')(root): href = item.abshref(link.get('href')) idx = sheets.get(href, None) if idx is not None: idx = to_ref(idx) link.set('href', 'kindle:flow:%s?mime=text/css' % idx) for tag in XPath('//h:style')(root): p = tag.getparent() idx = p.index(tag) raw = tag.text if not raw or not raw.strip(): extract(tag) continue sheet = cssutils.parseString(raw, validate=False) if fix_import_rules(sheet): raw = force_unicode(sheet.cssText, 'utf-8') repl = etree.Element(XHTML('link'), type='text/css', rel='stylesheet') repl.tail = '\n' p.insert(idx, repl) extract(tag) inlines[raw].append(repl) for raw, elems in inlines.iteritems(): idx = to_ref(len(self.flows)) self.flows.append(raw) for link in elems: link.set('href', 'kindle:flow:%s?mime=text/css' % idx) for item in self.oeb.manifest: if item.media_type in OEB_STYLES: sheet = self.data(item) if hasattr(sheet, 'cssRules'): fix_import_rules(sheet) for i, sheet in enumerate(tuple(self.flows)): if hasattr(sheet, 'cssText'): self.flows[i] = force_unicode(sheet.cssText, 'utf-8')
def read_inline_toc(self, href, frag): ans = TOC() base_href = '/'.join(href.split('/')[:-1]) with open(href.replace('/', os.sep), 'rb') as f: raw = f.read().decode(self.header.codec) root = parse_html(raw, log=self.log) body = XPath('//h:body')(root) reached = False if body: start = body[0] else: start = None reached = True if frag: elems = XPath('//*[@id="%s"]'%frag)(root) if elems: start = elems[0] def node_depth(elem): ans = 0 parent = elem.getparent() while parent is not None: parent = parent.getparent() ans += 1 return ans # Layer the ToC based on nesting order in the source HTML current_depth = None parent = ans seen = set() links = [] for elem in root.iterdescendants(etree.Element): if reached and elem.tag == XHTML('a') and elem.get('href', False): href = elem.get('href') href, frag = urldefrag(href) href = base_href + '/' + href text = xml2text(elem).strip() if (text, href, frag) in seen: continue seen.add((text, href, frag)) links.append((text, href, frag, node_depth(elem))) elif elem is start: reached = True depths = sorted(set(x[-1] for x in links)) depth_map = {x:i for i, x in enumerate(depths)} for text, href, frag, depth in links: depth = depth_map[depth] if current_depth is None: current_depth = 0 parent.add_item(href, frag, text) elif current_depth == depth: parent.add_item(href, frag, text) elif current_depth < depth: parent = parent[-1] if len(parent) > 0 else parent parent.add_item(href, frag, text) current_depth += 1 else: delta = current_depth - depth while delta > 0 and parent.parent is not None: parent = parent.parent delta -= 1 parent.add_item(href, frag, text) current_depth = depth return ans
from lxml import etree from PyQt4.QtCore import Qt from PyQt4.QtCore import QByteArray from PyQt4.QtCore import QBuffer from PyQt4.QtCore import QIODevice from PyQt4.QtGui import QColor from PyQt4.QtGui import QImage from PyQt4.QtGui import QPainter from PyQt4.QtSvg import QSvgRenderer from calibre.ebooks.oeb.base import XHTML, XLINK from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME from calibre.ebooks.oeb.base import xml2str, xpath from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.stylizer import Stylizer IMAGE_TAGS = set([XHTML('img'), XHTML('object')]) KEEP_ATTRS = set(['class', 'style', 'width', 'height', 'align']) class Unavailable(Exception): pass class SVGRasterizer(object): def __init__(self): from calibre.gui2 import is_ok_to_use_qt if not is_ok_to_use_qt(): raise Unavailable('Not OK to use Qt') @classmethod def config(cls, cfg):
def a(anchor): div.append(div.makeelement(XHTML('a'), href='#' + anchor)) div[-1].text = '\xa0' div[-1].tail = ' '
import os, re from PyQt5.Qt import ( Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer) from calibre.ebooks.oeb.base import XHTML, XLINK from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME from calibre.ebooks.oeb.base import xml2str, xpath from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.imghdr import what from polyglot.builtins import unicode_type from polyglot.urllib import urldefrag IMAGE_TAGS = {XHTML('img'), XHTML('object')} KEEP_ATTRS = {'class', 'style', 'width', 'height', 'align'} class Unavailable(Exception): pass class SVGRasterizer(object): def __init__(self, base_css=''): self.base_css = base_css from calibre.gui2 import must_use_qt must_use_qt() @classmethod
def convert_epub3_nav(self, nav_path, opf, log): from lxml import etree from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX from calibre.ebooks.oeb.polish.toc import first_child from tempfile import NamedTemporaryFile with lopen(nav_path, 'rb') as f: raw = f.read() raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0] root = parse(raw, log=log) ncx = etree.fromstring( '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>' ) navmap = ncx[0] et = '{%s}type' % EPUB_NS bn = os.path.basename(nav_path) def add_from_li(li, parent): href = text = None for x in li.iterchildren(XHTML('a'), XHTML('span')): text = etree.tostring( x, method='text', encoding=unicode, with_tail=False).strip() or ' '.join( x.xpath('descendant-or-self::*/@title')).strip() href = x.get('href') if href: if href.startswith('#'): href = bn + href break np = parent.makeelement(NCX('navPoint')) parent.append(np) np.append(np.makeelement(NCX('navLabel'))) np[0].append(np.makeelement(NCX('text'))) np[0][0].text = text if href: np.append(np.makeelement(NCX('content'), attrib={'src': href})) return np def process_nav_node(node, toc_parent): for li in node.iterchildren(XHTML('li')): child = add_from_li(li, toc_parent) ol = first_child(li, XHTML('ol')) if child is not None and ol is not None: process_nav_node(ol, child) for nav in root.iterdescendants(XHTML('nav')): if nav.get(et) == 'toc': ol = first_child(nav, XHTML('ol')) if ol is not None: process_nav_node(ol, navmap) break else: return with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f: f.write(etree.tostring(ncx, encoding='utf-8')) ncx_id = opf.add_path_to_manifest(f.name, NCX_MIME) for spine in opf.root.xpath('//*[local-name()="spine"]'): spine.set('toc', ncx_id)
def convert_epub3_nav(self, nav_path, opf, log, opts): from lxml import etree from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.polish.parsing import parse from calibre.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize from calibre.ebooks.oeb.polish.toc import first_child from tempfile import NamedTemporaryFile with lopen(nav_path, 'rb') as f: raw = f.read() raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0] root = parse(raw, log=log) ncx = etree.fromstring( '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>' ) navmap = ncx[0] et = '{%s}type' % EPUB_NS bn = os.path.basename(nav_path) def add_from_li(li, parent): href = text = None for x in li.iterchildren(XHTML('a'), XHTML('span')): text = etree.tostring( x, method='text', encoding=unicode_type, with_tail=False).strip() or ' '.join( x.xpath('descendant-or-self::*/@title')).strip() href = x.get('href') if href: if href.startswith('#'): href = bn + href break np = parent.makeelement(NCX('navPoint')) parent.append(np) np.append(np.makeelement(NCX('navLabel'))) np[0].append(np.makeelement(NCX('text'))) np[0][0].text = text if href: np.append(np.makeelement(NCX('content'), attrib={'src': href})) return np def process_nav_node(node, toc_parent): for li in node.iterchildren(XHTML('li')): child = add_from_li(li, toc_parent) ol = first_child(li, XHTML('ol')) if child is not None and ol is not None: process_nav_node(ol, child) for nav in root.iterdescendants(XHTML('nav')): if nav.get(et) == 'toc': ol = first_child(nav, XHTML('ol')) if ol is not None: process_nav_node(ol, navmap) break else: return with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f: f.write(etree.tostring(ncx, encoding='utf-8')) ncx_href = os.path.relpath(f.name, os.getcwdu()).replace(os.sep, '/') ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME, append=True).get('id') for spine in opf.root.xpath('//*[local-name()="spine"]'): spine.set('toc', ncx_id) opts.epub3_nav_href = urlnormalize( os.path.relpath(nav_path).replace(os.sep, '/')) opts.epub3_nav_parsed = root if getattr(self, 'removed_cover', None): changed = False base_path = os.path.dirname(nav_path) for elem in root.xpath('//*[@href]'): href, frag = elem.get('href').partition('#')[::2] link_path = os.path.relpath( os.path.join(base_path, urlunquote(href)), base_path) abs_href = urlnormalize(link_path) if abs_href == self.removed_cover: changed = True elem.set('data-calibre-removed-titlepage', '1') if changed: with open(nav_path, 'wb') as f: f.write(serialize(root, 'application/xhtml+xml'))
def collapse_li(parent): for li in parent.iterdescendants(XHTML('li')): if len(li) == 1: li.text = None li[0].tail = None
def merge_html(container, names, master): p = container.parsed root = p(master) # Ensure master has a <head> head = root.find('h:head', namespaces=XPNSMAP) if head is None: head = root.makeelement(XHTML('head')) container.insert_into_xml(root, head, 0) seen_anchors = all_anchors(root) seen_stylesheets = set(all_stylesheets(container, master)) master_body = p(master).findall('h:body', namespaces=XPNSMAP)[-1] master_base = os.path.dirname(master) anchor_map = {n: {} for n in names if n != master} for name in names: if name == master: continue # Insert new stylesheets into master for sheet in all_stylesheets(container, name): if sheet not in seen_stylesheets: seen_stylesheets.add(sheet) link = head.makeelement(XHTML('link'), rel='stylesheet', type='text/css', href=container.name_to_href( sheet, master)) container.insert_into_xml(head, link) # Rebase links if master is in a different directory if os.path.dirname(name) != master_base: container.replace_links(name, LinkRebaser(container, name, master)) root = p(name) children = [] for body in p(name).findall('h:body', namespaces=XPNSMAP): children.append( body.text if body.text and body.text.strip() else '\n\n') children.extend(body) first_child = '' for first_child in children: if not isinstance(first_child, basestring): break if isinstance(first_child, basestring): # body contained only text, no tags first_child = body.makeelement(XHTML('p')) first_child.text, children[0] = children[0], first_child amap = anchor_map[name] remove_name_attributes(root) for elem in root.xpath('//*[@id]'): val = elem.get('id') if not val: continue if val in seen_anchors: nval = unique_anchor(seen_anchors, val) elem.set('id', nval) amap[val] = nval else: seen_anchors.add(val) if 'id' not in first_child.attrib: first_child.set('id', unique_anchor(seen_anchors, 'top')) seen_anchors.add(first_child.get('id')) amap[''] = first_child.get('id') # Fix links that point to local changed anchors for a in XPath('//h:a[starts-with(@href, "#")]')(root): q = a.get('href')[1:] if q in amap: a.set('href', '#' + amap[q]) for child in children: if isinstance(child, basestring): add_text(master_body, child) else: master_body.append(copy.deepcopy(child)) container.remove_item(name, remove_from_guide=False) # Fix all links in the container that point to merged files for fname, media_type in container.mime_map.iteritems(): repl = MergeLinkReplacer(fname, anchor_map, master, container) container.replace_links(fname, repl)
ans['font-family'] = tuple(x.value for x in ff) for p in 'weight', 'style', 'stretch': p = 'font-' + p rp = resolve_property(elem, p) if pseudo is None else resolve_property( elem, pseudo, p) ans[p] = unicode_type(rp[0].value) normalize_font_properties(ans) return ans bad_fonts = { 'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit' } exclude_chars = frozenset(ord_string('\n\r\t')) skip_tags = {XHTML(x) for x in 'script style title meta link'.split()} font_keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} def prepare_font_rule(cssdict): cssdict['font-family'] = frozenset(cssdict['font-family'][:1]) cssdict['width'] = widths[cssdict['font-stretch']] cssdict['weight'] = int(cssdict['font-weight']) class StatsCollector: first_letter_pat = capitalize_pat = None def __init__(self, container, do_embed=False): if self.first_letter_pat is None:
def commit_nav_toc(container, toc, lang=None): from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree tocname = find_existing_nav_toc(container) if tocname is None: item = container.generate_item('nav.xhtml', id_prefix='nav') item.set('properties', 'nav') tocname = container.href_to_name(item.get('href'), base=container.opf_name) try: root = container.parsed(tocname) except KeyError: root = container.parse_xhtml( P('templates/new_nav.html', data=True).decode('utf-8')) et = '{%s}type' % EPUB_NS navs = [ n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == 'toc' ] for x in navs[1:]: extract(x) if navs: nav = navs[0] tail = nav.tail attrib = dict(nav.attrib) nav.clear() nav.attrib.update(attrib) nav.tail = tail else: nav = root.makeelement(XHTML('nav')) first_child(root, XHTML('body')).append(nav) nav.set('{%s}type' % EPUB_NS, 'toc') if toc.toc_title: nav.append(nav.makeelement(XHTML('h1'))) nav[-1].text = toc.toc_title rnode = nav.makeelement(XHTML('ol')) nav.append(rnode) to_href = partial(container.name_to_href, base=tocname) spat = re.compile(r'\s+') def process_node(xml_parent, toc_parent): for child in toc_parent: li = xml_parent.makeelement(XHTML('li')) xml_parent.append(li) title = child.title or '' title = spat.sub(' ', title).strip() a = li.makeelement(XHTML('a' if child.dest else 'span')) a.text = title li.append(a) if child.dest: href = to_href(child.dest) if child.frag: href += '#' + child.frag a.set('href', href) if len(child): ol = li.makeelement(XHTML('ol')) li.append(ol) process_node(ol, child) process_node(rnode, toc) pretty_xml_tree(rnode) for li in rnode.iterdescendants(XHTML('li')): if len(li) == 1: li.text = None li[0].tail = None container.replace(tocname, root)
def mobimlize_elem(self, elem, stylizer, bstate, istates, ignore_valign=False): if not isinstance(elem.tag, basestring) \ or namespace(elem.tag) != XHTML_NS: return style = stylizer.style(elem) # <mbp:frame-set/> does not exist lalalala if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ or style['visibility'] == 'hidden': id_ = elem.get('id', None) if id_: # Keep anchors so people can use display:none # to generate hidden TOCs tail = elem.tail elem.clear() elem.text = None elem.set('id', id_) elem.tail = tail elem.tag = XHTML('a') else: return tag = barename(elem.tag) istate = copy.copy(istates[-1]) istate.rendered = False istate.list_num = 0 if tag == 'ol' and 'start' in elem.attrib: try: istate.list_num = int(elem.attrib['start']) - 1 except: pass istates.append(istate) left = 0 display = style['display'] if display == 'table-cell': display = 'inline' elif display.startswith('table'): display = 'block' isblock = (not display.startswith('inline') and style['display'] != 'none') isblock = isblock and style['float'] == 'none' isblock = isblock and tag != 'br' if isblock: bstate.para = None istate.halign = style['text-align'] istate.indent = style['text-indent'] if style['margin-left'] == 'auto' \ and style['margin-right'] == 'auto': istate.halign = 'center' margin = asfloat(style['margin-left']) padding = asfloat(style['padding-left']) if tag != 'body': left = margin + padding istate.left += left vmargin = asfloat(style['margin-top']) bstate.vmargin = max((bstate.vmargin, vmargin)) vpadding = asfloat(style['padding-top']) if vpadding > 0: bstate.vpadding += bstate.vmargin bstate.vmargin = 0 bstate.vpadding += vpadding elif not istate.href: margin = asfloat(style['margin-left']) padding = asfloat(style['padding-left']) lspace = margin + padding if lspace > 0: spaces = int(round((lspace * 3) / style['font-size'])) elem.text = (u'\xa0' * spaces) + (elem.text or '') margin = asfloat(style['margin-right']) padding = asfloat(style['padding-right']) rspace = margin + padding if rspace > 0: spaces = int(round((rspace * 3) / style['font-size'])) if len(elem) == 0: elem.text = (elem.text or '') + (u'\xa0' * spaces) else: last = elem[-1] last.text = (last.text or '') + (u'\xa0' * spaces) if bstate.content and style['page-break-before'] in PAGE_BREAKS: bstate.pbreak = True istate.fsize = self.mobimlize_font(style['font-size']) istate.italic = True if style['font-style'] == 'italic' else False weight = style['font-weight'] istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400 istate.preserve = (style['white-space'] in ('pre', 'pre-wrap')) istate.bgcolor = style['background-color'] istate.fgcolor = style['color'] istate.strikethrough = style.effective_text_decoration == 'line-through' istate.underline = style.effective_text_decoration == 'underline' ff = style['font-family'].lower() if style['font-family'] else '' if 'monospace' in ff or 'courier' in ff or ff.endswith(' mono'): istate.family = 'monospace' elif ('sans-serif' in ff or 'sansserif' in ff or 'verdana' in ff or 'arial' in ff or 'helvetica' in ff): istate.family = 'sans-serif' else: istate.family = 'serif' if 'id' in elem.attrib: istate.ids.add(elem.attrib['id']) if 'name' in elem.attrib: istate.ids.add(elem.attrib['name']) if tag == 'a' and 'href' in elem.attrib: istate.href = elem.attrib['href'] istate.attrib.clear() if tag == 'img' and 'src' in elem.attrib: istate.attrib['src'] = elem.attrib['src'] istate.attrib['align'] = 'baseline' cssdict = style.cssdict() valign = cssdict.get('vertical-align', None) if valign in ('top', 'bottom', 'middle'): istate.attrib['align'] = valign for prop in ('width', 'height'): if cssdict[prop] != 'auto': value = style[prop] if value == getattr(self.profile, prop): result = '100%' else: # Amazon's renderer does not support # img sizes in units other than px # See #7520 for test case try: pixs = int( round(float(value) / (72. / self.profile.dpi))) except: continue result = str(pixs) istate.attrib[prop] = result if 'width' not in istate.attrib or 'height' not in istate.attrib: href = self.current_spine_item.abshref(elem.attrib['src']) try: item = self.oeb.manifest.hrefs[urlnormalize(href)] except: self.oeb.logger.warn('Failed to find image:', href) else: try: width, height = identify_data(item.data)[:2] except: self.oeb.logger.warn('Invalid image:', href) else: if 'width' not in istate.attrib and 'height' not in \ istate.attrib: istate.attrib['width'] = str(width) istate.attrib['height'] = str(height) else: ar = float(width) / float(height) if 'width' not in istate.attrib: try: width = int(istate.attrib['height']) * ar except: pass istate.attrib['width'] = str(int(width)) else: try: height = int(istate.attrib['width']) / ar except: pass istate.attrib['height'] = str(int(height)) item.unload_data_from_memory() elif tag == 'hr' and asfloat(style['width']) > 0: prop = style['width'] / self.profile.width istate.attrib['width'] = "%d%%" % int(round(prop * 100)) elif display == 'table': tag = 'table' elif display == 'table-row': tag = 'tr' elif display == 'table-cell': tag = 'td' if tag in TABLE_TAGS and self.ignore_tables: tag = 'span' if tag == 'td' else 'div' if tag in ('table', 'td', 'tr'): col = style.backgroundColor if col: elem.set('bgcolor', col) css = style.cssdict() if 'border' in css or 'border-width' in css: elem.set('border', '1') if tag in TABLE_TAGS: for attr in ('rowspan', 'colspan', 'width', 'border', 'scope', 'bgcolor'): if attr in elem.attrib: istate.attrib[attr] = elem.attrib[attr] if tag == 'q': t = elem.text if not t: t = '' elem.text = u'\u201c' + t t = elem.tail if not t: t = '' elem.tail = u'\u201d' + t text = None if elem.text: if istate.preserve: text = elem.text else: text = COLLAPSE.sub(' ', elem.text) valign = style['vertical-align'] not_baseline = valign in ('super', 'sub', 'text-top', 'text-bottom', 'top', 'bottom') or (isinstance( valign, (float, int)) and abs(valign) != 0) issup = valign in ('super', 'text-top', 'top') or (isinstance(valign, (float, int)) and valign > 0) vtag = 'sup' if issup else 'sub' if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock: nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP) vbstate = BlockState(etree.SubElement(nroot, XHTML('body'))) vbstate.para = etree.SubElement(vbstate.body, XHTML('p')) self.mobimlize_elem(elem, stylizer, vbstate, istates, ignore_valign=True) if len(istates) > 0: istates.pop() if len(istates) == 0: istates.append(FormatState()) at_start = bstate.para is None if at_start: self.mobimlize_content('span', '', bstate, istates) parent = bstate.para if bstate.inline is None else bstate.inline if parent is not None: vtag = etree.SubElement(parent, XHTML(vtag)) vtag = etree.SubElement(vtag, XHTML('small')) # Add anchors for child in vbstate.body: if child is not vbstate.para: vtag.append(child) else: break if vbstate.para is not None: for child in vbstate.para: vtag.append(child) return if tag == 'blockquote': old_mim = self.opts.mobi_ignore_margins self.opts.mobi_ignore_margins = False if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or ( # We have an id but no text and no children, the id should still # be added. istate.ids and tag in ('a', 'span', 'i', 'b', 'u') and len(elem) == 0)): self.mobimlize_content(tag, text, bstate, istates) for child in elem: self.mobimlize_elem(child, stylizer, bstate, istates) tail = None if child.tail: if istate.preserve: tail = child.tail elif bstate.para is None and isspace(child.tail): tail = None else: tail = COLLAPSE.sub(' ', child.tail) if tail: self.mobimlize_content(tag, tail, bstate, istates) if tag == 'blockquote': self.opts.mobi_ignore_margins = old_mim if bstate.content and style['page-break-after'] in PAGE_BREAKS: bstate.pbreak = True if isblock: para = bstate.para if para is not None and para.text == u'\xa0' and len(para) < 1: if style.height > 2: para.getparent().replace(para, etree.Element(XHTML('br'))) else: # This is too small to be rendered effectively, drop it para.getparent().remove(para) bstate.para = None bstate.istate = None vmargin = asfloat(style['margin-bottom']) bstate.vmargin = max((bstate.vmargin, vmargin)) vpadding = asfloat(style['padding-bottom']) if vpadding > 0: bstate.vpadding += bstate.vmargin bstate.vmargin = 0 bstate.vpadding += vpadding if bstate.nested and bstate.nested[-1].tag == elem.tag: bstate.nested.pop() istates.pop()
def render_jacket(mi, output_profile, alt_title=_('Unknown'), alt_tags=[], alt_comments='', alt_publisher='', rescale_fonts=False, alt_authors=None): css = P('jacket/stylesheet.css', data=True).decode('utf-8') template = P('jacket/template.xhtml', data=True).decode('utf-8') template = re.sub(r'<!--.*?-->', '', template, flags=re.DOTALL) css = re.sub(r'/\*.*?\*/', '', css, flags=re.DOTALL) try: title_str = alt_title if mi.is_null('title') else mi.title except: title_str = _('Unknown') title_str = escape(title_str) title = '<span class="title">%s</span>' % title_str series = Series(mi.series, mi.series_index) try: publisher = mi.publisher if not mi.is_null( 'publisher') else alt_publisher except: publisher = '' publisher = escape(publisher) try: if is_date_undefined(mi.pubdate): pubdate = '' else: dt = as_local_time(mi.pubdate) pubdate = strftime('%Y', dt.timetuple()) except: pubdate = '' rating = get_rating(mi.rating, output_profile.ratings_char, output_profile.empty_ratings_char) tags = Tags((mi.tags if mi.tags else alt_tags), output_profile) comments = mi.comments if mi.comments else alt_comments comments = comments.strip() if comments: comments = comments_to_html(comments) orig = mi.authors if mi.is_null('authors'): mi.authors = list(alt_authors or (_('Unknown'), )) try: author = mi.format_authors() except: author = '' mi.authors = orig author = escape(author) has_data = {} def generate_html(comments): args = dict( xmlns=XHTML_NS, title_str=title_str, css=css, title=title, author=author, publisher=publisher, pubdate_label=_('Published'), pubdate=pubdate, series_label=ngettext('Series', 'Series', 1), series=series, rating_label=_('Rating'), rating=rating, tags_label=_('Tags'), tags=tags, comments=comments, footer='', searchable_tags=' '.join( escape(t) + 'ttt' for t in tags.tags_list), ) for key in mi.custom_field_keys(): m = mi.get_user_metadata(key, False) or {} try: display_name, val = mi.format_field_extended(key)[:2] dkey = key.replace('#', '_') dt = m.get('datatype') if dt == 'series': args[dkey] = Series(mi.get(key), mi.get(key + '_index')) elif dt == 'rating': args[dkey] = rating_to_stars( mi.get(key), m.get('display', {}).get('allow_half_stars', False)) elif dt == 'comments': val = val or '' display = m.get('display', {}) ctype = display.get('interpret_as') or 'html' if ctype == 'long-text': val = '<pre style="white-space:pre-wrap">%s</pre>' % escape( val) elif ctype == 'short-text': val = '<span>%s</span>' % escape(val) elif ctype == 'markdown': val = markdown(val) else: val = comments_to_html(val) args[dkey] = val else: args[dkey] = escape(val) args[dkey + '_label'] = escape(display_name) except Exception: # if the val (custom column contents) is None, don't add to args pass if False: print("Custom column values available in jacket template:") for key in args.keys(): if key.startswith('_') and not key.endswith('_label'): print(" %s: %s" % ('#' + key[1:], args[key])) # Used in the comment describing use of custom columns in templates # Don't change this unless you also change it in template.xhtml args['_genre_label'] = args.get('_genre_label', '{_genre_label}') args['_genre'] = args.get('_genre', '{_genre}') formatter = SafeFormatter() generated_html = formatter.format(template, **args) has_data['series'] = bool(series) has_data['tags'] = bool(tags) has_data['rating'] = bool(rating) has_data['pubdate'] = bool(pubdate) return strip_encoding_declarations(generated_html) from calibre.ebooks.oeb.polish.parsing import parse raw = generate_html(comments) root = parse(raw, line_numbers=False, force_html5_parse=True) if rescale_fonts: # We ensure that the conversion pipeline will set the font sizes for # text in the jacket to the same size as the font sizes for the rest of # the text in the book. That means that as long as the jacket uses # relative font sizes (em or %), the post conversion font size will be # the same as for text in the main book. So text with size x em will # be rescaled to the same value in both the jacket and the main content. # # We cannot use data-calibre-rescale 100 on the body tag as that will just # give the body tag a font size of 1em, which is useless. for body in root.xpath('//*[local-name()="body"]'): fw = body.makeelement(XHTML('div')) fw.set('data-calibre-rescale', '100') for child in body: fw.append(child) body.append(fw) postprocess_jacket(root, output_profile, has_data) from calibre.ebooks.oeb.polish.pretty import pretty_html_tree pretty_html_tree(None, root) return root
def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None): from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree tocname = find_existing_nav_toc(container) if previous_nav is not None: nav_name = container.href_to_name(previous_nav[0]) if nav_name and container.exists(nav_name): tocname = nav_name container.apply_unique_properties(tocname, 'nav') if tocname is None: item = container.generate_item('nav.xhtml', id_prefix='nav') item.set('properties', 'nav') tocname = container.href_to_name(item.get('href'), base=container.opf_name) if previous_nav is not None: root = previous_nav[1] else: root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8')) container.replace(tocname, root) else: root = container.parsed(tocname) if lang: lang = lang_as_iso639_1(lang) or lang root.set('lang', lang) root.set('{%s}lang' % XML_NS, lang) nav = ensure_single_nav_of_type(root, 'toc') if toc.toc_title: nav.append(nav.makeelement(XHTML('h1'))) nav[-1].text = toc.toc_title rnode = nav.makeelement(XHTML('ol')) nav.append(rnode) to_href = partial(container.name_to_href, base=tocname) spat = re.compile(r'\s+') def process_node(xml_parent, toc_parent): for child in toc_parent: li = xml_parent.makeelement(XHTML('li')) xml_parent.append(li) title = child.title or '' title = spat.sub(' ', title).strip() a = li.makeelement(XHTML('a' if child.dest else 'span')) a.text = title li.append(a) if child.dest: href = to_href(child.dest) if child.frag: href += '#'+child.frag a.set('href', href) if len(child): ol = li.makeelement(XHTML('ol')) li.append(ol) process_node(ol, child) process_node(rnode, toc) pretty_xml_tree(nav) def collapse_li(parent): for li in parent.iterdescendants(XHTML('li')): if len(li) == 1: li.text = None li[0].tail = None collapse_li(nav) nav.tail = '\n' def create_li(ol, entry): li = ol.makeelement(XHTML('li')) ol.append(li) a = li.makeelement(XHTML('a')) li.append(a) href = container.name_to_href(entry['dest'], tocname) if entry['frag']: href += '#' + entry['frag'] a.set('href', href) return a if landmarks is not None: nav = ensure_single_nav_of_type(root, 'landmarks') nav.set('hidden', '') ol = nav.makeelement(XHTML('ol')) nav.append(ol) for entry in landmarks: if entry['type'] and container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS: a = create_li(ol, entry) a.set('{%s}type' % EPUB_NS, entry['type']) a.text = entry['title'] or None pretty_xml_tree(nav) collapse_li(nav) if toc.page_list: nav = ensure_single_nav_of_type(root, 'page-list') nav.set('hidden', '') ol = nav.makeelement(XHTML('ol')) nav.append(ol) for entry in toc.page_list: if container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS: a = create_li(ol, entry) a.text = unicode_type(entry['pagenum']) pretty_xml_tree(nav) collapse_li(nav) container.replace(tocname, root)
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: self.profile = opts.output_profile self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] head = xpath(tree, '/h:html/h:head') if head: head = head[0] else: head = [] # Add cssutils parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) self.font_face_rules = [] for elem in head: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES): text = elem.text if elem.text else u'' for x in elem: t = getattr(x, 'text', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') t = getattr(x, 'tail', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') if text: text = oeb.css_preprocessor(text, add_namespace=False) # We handle @import rules separately parser.setFetcher(lambda x: ('utf-8', b'')) stylesheet = parser.parseString(text, href=cssname, validate=False) parser.setFetcher(self._fetch_css_file) #stylesheet.namespaces['h'] = XHTML_NS for rule in stylesheet.cssRules: if rule.type == rule.IMPORT_RULE: ihref = item.abshref(rule.href) if rule.media.mediaText == 'amzn-mobi': continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: self.logger.warn( 'Ignoring missing stylesheet in @import rule:', rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in OEB_STYLES: self.logger.warn( 'CSS @import of non-CSS file %r' % rule.href) continue stylesheets.append(sitem.data) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) stylesheets.append(stylesheet) elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \ and elem.get('type', CSS_MIME).lower() in OEB_STYLES: href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS' % (path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css': extra_css, 'user_css': user_css} for w, x in csses.items(): if x: try: text = XHTML_CSS_NAMESPACE + x stylesheet = parser.parseString(text, href=cssname, validate=False) #stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) except: self.logger.exception('Failed to parse %s, ignoring.' % w) self.logger.debug('Bad css: ') self.logger.debug(x) rules = [] index = 0 self.stylesheets = set() self.page_rule = {} for stylesheet in stylesheets: href = stylesheet.href self.stylesheets.add(href) for rule in stylesheet.cssRules: if rule.type == rule.MEDIA_RULE: media = { rule.media.item(i) for i in xrange(rule.media.length) } if not media.intersection({'all', 'screen', 'amzn-kf8'}): continue for subrule in rule.cssRules: rules.extend(self.flatten_rule(subrule, href, index)) index += 1 else: rules.extend(self.flatten_rule(rule, href, index)) index = index + 1 rules.sort() self.rules = rules self._styles = {} pseudo_pat = re.compile( ur':(first-letter|first-line|link|hover|visited|active|focus|before|after)', re.I) for _, _, cssdict, text, _ in rules: fl = pseudo_pat.search(text) if fl is not None: text = text.replace(fl.group(), '') selector = get_css_selector(text, self.oeb.log) matches = selector(tree, self.logger) if fl is not None: fl = fl.group(1) if fl == 'first-letter' and getattr(self.oeb, 'plumber_output_format', '').lower() == u'mobi': # Fake first-letter from lxml.builder import ElementMaker E = ElementMaker(namespace=XHTML_NS) for elem in matches: for x in elem.iter(): if x.text: punctuation_chars = [] text = unicode(x.text) while text: category = unicodedata.category(text[0]) if category[0] not in {'P', 'Z'}: break punctuation_chars.append(text[0]) text = text[1:] special_text = u''.join(punctuation_chars) + \ (text[0] if text else u'') span = E.span(special_text) span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: # Element pseudo-class for elem in matches: self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'\d+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)