def primary_find(pat, src): "find that ignores case and accents on letters" if _icu_not_ok: from calibre.utils.filenames import ascii_text return py_find(ascii_text(pat), ascii_text(src)) return primary_icu_find(pat, src)
def primary_startswith(a, b): if _icu_not_ok: from calibre.utils.filenames import ascii_text return ascii_text(a).lower().startswith(ascii_text(b).lower()) try: return icu_startswith(_primary_collator, a, b) except AttributeError: return icu_startswith(primary_collator(), a, b)
def primary_find(pat, src): 'find that ignores case and accents on letters' if _icu_not_ok: from calibre.utils.filenames import ascii_text return py_find(ascii_text(pat), ascii_text(src)) try: return icu_find(_primary_collator, pat, src) except AttributeError: return icu_find(primary_collator(), pat, src)
def primary_strcmp(a, b): 'strcmp that ignores case and accents on letters' if _icu_not_ok: from calibre.utils.filenames import ascii_text return py_strcmp(ascii_text(a), ascii_text(b)) try: return _primary_collator.strcmp(a, b) except AttributeError: return primary_collator().strcmp(a, b)
def write_unicode_text(self, text, ignore_errors=False): ' Windows only method that writes unicode strings correctly to the windows console using the Win32 API ' if self.is_console: from ctypes import wintypes, byref, c_wchar_p written = wintypes.DWORD(0) chunk = len(text) while text: t, text = text[:chunk], text[chunk:] wt = c_wchar_p(t) if not self.write_console(self.file_handle, wt, self.wcslen(wt), byref(written), None): # Older versions of windows can fail to write large strings # to console with WriteConsoleW (seen it happen on Win XP) import ctypes, winerror err = ctypes.get_last_error() if err == winerror.ERROR_NOT_ENOUGH_MEMORY and chunk >= 128: # Retry with a smaller chunk size (give up if chunk < 128) chunk = chunk // 2 text = t + text continue if err == winerror.ERROR_GEN_FAILURE: # On newer windows, this happens when trying to write # non-ascii chars to the console and the console is set # to use raster fonts (the default). In this case # rather than failing, write an informative error # message and the asciized version of the text. print ('Non-ASCII text detected. You must set your Console\'s font to' ' Lucida Console or Consolas or some other TrueType font to see this text', file=self.stream, end=' -- ') from calibre.utils.filenames import ascii_text print (ascii_text(t + text), file=self.stream, end='') continue if not ignore_errors: raise ctypes.WinError(err)
def create_service(desc, type, port, properties, add_hostname, use_ip_address=None): port = int(port) try: hostname = ascii_text(force_unicode(socket.gethostname())).partition('.')[0] except: hostname = 'Unknown' if add_hostname: try: desc += ' (on %s port %d)'%(hostname, port) except: try: desc += ' (on %s)'%hostname except: pass if use_ip_address: local_ip = use_ip_address else: local_ip = get_external_ip() type = type+'.local.' from calibre.utils.Zeroconf import ServiceInfo return ServiceInfo(type, desc+'.'+type, address=socket.inet_aton(local_ip), port=port, properties=properties, server=hostname+'.local.')
def create_service(desc, type, port, properties, add_hostname, use_ip_address=None): port = int(port) try: hostname = ascii_text(force_unicode( socket.gethostname())).partition('.')[0] except: hostname = 'Unknown' if add_hostname: try: desc += ' (on %s port %d)' % (hostname, port) except: try: desc += ' (on %s)' % hostname except: pass if use_ip_address: local_ip = use_ip_address else: local_ip = get_external_ip() type = type + '.local.' from calibre.utils.Zeroconf import ServiceInfo return ServiceInfo(type, desc + '.' + type, address=socket.inet_aton(local_ip), port=port, properties=properties, server=hostname + '.local.')
def generate_anchor(name, existing): x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_') c = 1 while y in existing: y = '%s_%d' % (x, c) c += 1 return y
def first_char(item): val = getattr(item, 'sort', item.name) if not val: val = 'A' for c in ascii_text(val): if c.isalnum(): return c return 'A'
def primary_sort_key(val): 'A sort key that ignores case and diacritics' if _icu_not_ok: from calibre.utils.filenames import ascii_text return ascii_text(val).lower() try: return _primary_collator.sort_key(val) except AttributeError: return primary_collator().sort_key(val)
def tpl_replace(objtplname) : tpl_field = re.sub(u'[\\{\\}]', u'', objtplname.group()) if tpl_field in TEMPLATE_ALLOWED_FIELDS : if tpl_field in ['pubdate', 'timestamp'] : tpl_field = isoformat(entry[tpl_field]).partition('T')[0] elif tpl_field in ['tags', 'authors'] : tpl_field =entry[tpl_field][0] elif tpl_field in ['id', 'series_index'] : tpl_field = str(entry[tpl_field]) else : tpl_field = entry[tpl_field] return ascii_text(tpl_field) else: return u''
def tpl_replace(objtplname) : tpl_field = re.sub(r'[\{\}]', '', objtplname.group()) if tpl_field in TEMPLATE_ALLOWED_FIELDS : if tpl_field in ['pubdate', 'timestamp'] : tpl_field = isoformat(entry[tpl_field]).partition('T')[0] elif tpl_field in ['tags', 'authors'] : tpl_field =entry[tpl_field][0] elif tpl_field in ['id', 'series_index'] : tpl_field = unicode_type(entry[tpl_field]) else : tpl_field = entry[tpl_field] return ascii_text(tpl_field) else: return ''
def create_service(desc, service_type, port, properties, add_hostname, use_ip_address=None): port = int(port) try: hostname = ascii_text(force_unicode(socket.gethostname())).partition('.')[0] except: hostname = 'Unknown' if add_hostname: try: desc += ' (on %s port %d)'%(hostname, port) except: try: desc += ' (on %s)'%hostname except: pass if use_ip_address: local_ip = use_ip_address else: local_ip = get_external_ip() if not local_ip: raise ValueError('Failed to determine local IP address to advertise via BonJour') service_type = service_type+'.local.' service_name = desc + '.' + service_type server_name = hostname+'.local.' if ispy3: from zeroconf import ServiceInfo else: from calibre.utils.Zeroconf import ServiceInfo def enc(x): if isinstance(x, unicode_type): x = x.encode('ascii') return x service_type = enc(service_type) service_name = enc(service_name) server_name = enc(server_name) if properties: properties = {enc(k): enc(v) for k, v in iteritems(properties)} return ServiceInfo( service_type, service_name, address=socket.inet_aton(local_ip), port=port, properties=properties, server=server_name)
def create_service(desc, service_type, port, properties, add_hostname, use_ip_address=None): port = int(port) try: hostname = ascii_text(force_unicode( socket.gethostname())).partition('.')[0] except: hostname = 'Unknown' if add_hostname: try: desc += ' (on %s port %d)' % (hostname, port) except: try: desc += ' (on %s)' % hostname except: pass if use_ip_address: local_ip = use_ip_address else: local_ip = get_external_ip() if not local_ip: raise ValueError( 'Failed to determine local IP address to advertise via BonJour') service_type = service_type + '.local.' service_name = desc + '.' + service_type server_name = hostname + '.local.' from zeroconf import ServiceInfo return ServiceInfo(service_type, service_name, addresses=[ socket.inet_aton(local_ip), ], port=port, properties=properties, server=server_name)
def safe_localhost(): # RFC 2821 says we should use the fqdn in the EHLO/HELO verb, and # if that can't be calculated, that we should use a domain literal # instead (essentially an encoded IP address like [A.B.C.D]). fqdn = socket.getfqdn() if '.' in fqdn: # Some mail servers have problems with non-ascii local hostnames, see # https://bugs.launchpad.net/bugs/1256549 try: local_hostname = ascii_text(fqdn) except: local_hostname = 'localhost.localdomain' else: # We can't find an fqdn hostname, so use a domain literal addr = '127.0.0.1' try: addr = socket.gethostbyname(socket.gethostname()) except socket.gaierror: pass local_hostname = '[%s]' % addr return local_hostname
def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id): if not isinstance(node.tag, basestring) or namespace(node.tag) != XHTML_NS: return tag = barename(node.tag) style = stylizer.style(node) cssdict = style.cssdict() try: font_size = style["font-size"] except: font_size = self.sbase if self.sbase is not None else self.context.source.fbase if "align" in node.attrib: if tag != "img": cssdict["text-align"] = node.attrib["align"] else: val = node.attrib["align"] if val in ("middle", "bottom", "top"): cssdict["vertical-align"] = val elif val in ("left", "right"): cssdict["float"] = val del node.attrib["align"] if node.tag == XHTML("font"): tags = [ "descendant::h:%s" % x for x in ("p", "div", "table", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "dl", "blockquote") ] tag = "div" if XPath("|".join(tags))(node) else "span" node.tag = XHTML(tag) if "size" in node.attrib: def force_int(raw): return int(re.search(r"([0-9+-]+)", raw).group(1)) size = node.attrib["size"].strip() if size: fnums = self.context.source.fnums if size[0] in ("+", "-"): # Oh, the warcrimes try: esize = 3 + force_int(size) except: esize = 3 if esize < 1: esize = 1 if esize > 7: esize = 7 font_size = fnums[esize] else: try: font_size = fnums[force_int(size)] except: font_size = fnums[3] cssdict["font-size"] = "%.1fpt" % font_size del node.attrib["size"] if "face" in node.attrib: cssdict["font-family"] = node.attrib["face"] del node.attrib["face"] if "color" in node.attrib: try: cssdict["color"] = Property("color", node.attrib["color"]).value except (ValueError, SyntaxErr): pass del node.attrib["color"] if "bgcolor" in node.attrib: try: cssdict["background-color"] = Property("background-color", node.attrib["bgcolor"]).value except (ValueError, SyntaxErr): pass del node.attrib["bgcolor"] if cssdict.get("font-weight", "").lower() == "medium": cssdict["font-weight"] = "normal" # ADE chokes on font-weight medium fsize = font_size is_drop_cap = ( cssdict.get("float", None) == "left" and "font-size" in cssdict and len(node) == 0 and node.text and len(node.text) == 1 ) is_drop_cap = is_drop_cap or ( # The docx input plugin generates drop caps that look like this len(node) == 1 and not node.text and len(node[0]) == 0 and node[0].text and not node[0].tail and len(node[0].text) == 1 and "line-height" in cssdict and "font-size" in cssdict ) if not self.context.disable_font_rescaling and not is_drop_cap: _sbase = self.sbase if self.sbase is not None else self.context.source.fbase dyn_rescale = dynamic_rescale_factor(node) if dyn_rescale is not None: fsize = self.fmap[_sbase] fsize *= dyn_rescale cssdict["font-size"] = "%0.5fem" % (fsize / psize) psize = fsize elif "font-size" in cssdict or tag == "body": fsize = self.fmap[font_size] try: cssdict["font-size"] = "%0.5fem" % (fsize / psize) except ZeroDivisionError: cssdict["font-size"] = "%.1fpt" % fsize psize = fsize try: minlh = self.context.minimum_line_height / 100.0 if not is_drop_cap and style["line-height"] < minlh * fsize: cssdict["line-height"] = str(minlh) except: self.oeb.logger.exception("Failed to set minimum line-height") if cssdict: for x in self.filter_css: cssdict.pop(x, None) if cssdict: if self.lineh and self.fbase and tag != "body": self.clean_edges(cssdict, style, psize) if "display" in cssdict and cssdict["display"] == "in-line": cssdict["display"] = "inline" if self.unfloat and "float" in cssdict and cssdict.get("display", "none") != "none": del cssdict["display"] if self.untable and "display" in cssdict and cssdict["display"].startswith("table"): display = cssdict["display"] if display == "table-cell": cssdict["display"] = "inline" else: cssdict["display"] = "block" if "vertical-align" in cssdict and cssdict["vertical-align"] == "sup": cssdict["vertical-align"] = "super" if self.lineh and "line-height" not in cssdict: lineh = self.lineh / psize cssdict["line-height"] = "%0.5fem" % lineh if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ("p", "div"): if item_id != "calibre_jacket" or self.context.output_profile.name == "Kindle": for prop in ("margin", "padding", "border"): for edge in ("top", "bottom"): cssdict["%s-%s" % (prop, edge)] = "0pt" if self.context.insert_blank_line: cssdict["margin-top"] = cssdict["margin-bottom"] = "%fem" % self.context.insert_blank_line_size indent_size = self.context.remove_paragraph_spacing_indent_size keep_indents = indent_size < 0.0 if ( self.context.remove_paragraph_spacing and not keep_indents and cssdict.get("text-align", None) not in ("center", "right") ): cssdict["text-indent"] = "%1.1fem" % indent_size pseudo_classes = style.pseudo_classes(self.filter_css) if cssdict or pseudo_classes: keep_classes = set() if cssdict: items = sorted(cssdict.items()) css = u";\n".join(u"%s: %s" % (key, val) for key, val in items) classes = node.get("class", "").strip() or "calibre" klass = ascii_text(STRIPNUM.sub("", classes.split()[0].replace("_", ""))) if css in styles: match = styles[css] else: match = klass + str(names[klass] or "") styles[css] = match names[klass] += 1 node.attrib["class"] = match keep_classes.add(match) for psel, cssdict in pseudo_classes.iteritems(): items = sorted(cssdict.iteritems()) css = u";\n".join(u"%s: %s" % (key, val) for key, val in items) pstyles = pseudo_styles[psel] if css in pstyles: match = pstyles[css] else: # We have to use a different class for each psel as # otherwise you can have incorrect styles for a situation # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green } # If the pcalibre class for a:hover and a:link is the same, # then the class attribute for a.x tags will contain both # that class and the class for a.x:hover, which is wrong. klass = "pcalibre" match = klass + str(names[klass] or "") pstyles[css] = match names[klass] += 1 keep_classes.add(match) node.attrib["class"] = " ".join(keep_classes) elif "class" in node.attrib: del node.attrib["class"] if "style" in node.attrib: del node.attrib["style"] for child in node: self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
def strip_accents(self, s): return ascii_text(s)
def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id): if not isinstance(node.tag, basestring) \ or namespace(node.tag) != XHTML_NS: return tag = barename(node.tag) style = stylizer.style(node) cssdict = style.cssdict() try: font_size = style['font-size'] except: font_size = self.sbase if self.sbase is not None else \ self.context.source.fbase if 'align' in node.attrib: if tag != 'img': cssdict['text-align'] = node.attrib['align'] else: val = node.attrib['align'] if val in ('middle', 'bottom', 'top'): cssdict['vertical-align'] = val elif val in ('left', 'right'): cssdict['float'] = val del node.attrib['align'] if node.tag == XHTML('font'): tags = [ 'descendant::h:%s' % x for x in ('p', 'div', 'table', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote') ] tag = 'div' if XPath('|'.join(tags))(node) else 'span' node.tag = XHTML(tag) if 'size' in node.attrib: def force_int(raw): return int(re.search(r'([0-9+-]+)', raw).group(1)) size = node.attrib['size'].strip() if size: fnums = self.context.source.fnums if size[0] in ('+', '-'): # Oh, the warcrimes try: esize = 3 + force_int(size) except: esize = 3 if esize < 1: esize = 1 if esize > 7: esize = 7 font_size = fnums[esize] else: try: font_size = fnums[force_int(size)] except: font_size = fnums[3] cssdict['font-size'] = '%.1fpt' % font_size del node.attrib['size'] if 'face' in node.attrib: cssdict['font-family'] = node.attrib['face'] del node.attrib['face'] if 'color' in node.attrib: try: cssdict['color'] = Property('color', node.attrib['color']).value except (ValueError, SyntaxErr): pass del node.attrib['color'] if 'bgcolor' in node.attrib: try: cssdict['background-color'] = Property( 'background-color', node.attrib['bgcolor']).value except (ValueError, SyntaxErr): pass del node.attrib['bgcolor'] if cssdict.get('font-weight', '').lower() == 'medium': cssdict[ 'font-weight'] = 'normal' # ADE chokes on font-weight medium fsize = font_size is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in cssdict and len(node) == 0 and node.text and len(node.text) == 1) if not self.context.disable_font_rescaling and not is_drop_cap: _sbase = self.sbase if self.sbase is not None else \ self.context.source.fbase dyn_rescale = dynamic_rescale_factor(node) if dyn_rescale is not None: fsize = self.fmap[_sbase] fsize *= dyn_rescale cssdict['font-size'] = '%0.5fem' % (fsize / psize) psize = fsize elif 'font-size' in cssdict or tag == 'body': fsize = self.fmap[font_size] try: cssdict['font-size'] = "%0.5fem" % (fsize / psize) except ZeroDivisionError: cssdict['font-size'] = '%.1fpt' % fsize psize = fsize try: minlh = self.context.minimum_line_height / 100. if not is_drop_cap and style['line-height'] < minlh * fsize: cssdict['line-height'] = str(minlh) except: self.oeb.logger.exception('Failed to set minimum line-height') if cssdict: for x in self.filter_css: cssdict.pop(x, None) if cssdict: if self.lineh and self.fbase and tag != 'body': self.clean_edges(cssdict, style, psize) if 'display' in cssdict and cssdict['display'] == 'in-line': cssdict['display'] = 'inline' if self.unfloat and 'float' in cssdict \ and cssdict.get('display', 'none') != 'none': del cssdict['display'] if self.untable and 'display' in cssdict \ and cssdict['display'].startswith('table'): display = cssdict['display'] if display == 'table-cell': cssdict['display'] = 'inline' else: cssdict['display'] = 'block' if 'vertical-align' in cssdict \ and cssdict['vertical-align'] == 'sup': cssdict['vertical-align'] = 'super' if self.lineh and 'line-height' not in cssdict: lineh = self.lineh / psize cssdict['line-height'] = "%0.5fem" % lineh if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'): if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle': for prop in ('margin', 'padding', 'border'): for edge in ('top', 'bottom'): cssdict['%s-%s' % (prop, edge)] = '0pt' if self.context.insert_blank_line: cssdict['margin-top'] = cssdict['margin-bottom'] = \ '%fem'%self.context.insert_blank_line_size indent_size = self.context.remove_paragraph_spacing_indent_size keep_indents = indent_size < 0.0 if (self.context.remove_paragraph_spacing and not keep_indents and cssdict.get('text-align', None) not in ('center', 'right')): cssdict['text-indent'] = "%1.1fem" % indent_size pseudo_classes = style.pseudo_classes(self.filter_css) if cssdict or pseudo_classes: keep_classes = set() if cssdict: items = cssdict.items() items.sort() css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) classes = node.get('class', '').strip() or 'calibre' klass = ascii_text( STRIPNUM.sub('', classes.split()[0].replace('_', ''))) if css in styles: match = styles[css] else: match = klass + str(names[klass] or '') styles[css] = match names[klass] += 1 node.attrib['class'] = match keep_classes.add(match) for psel, cssdict in pseudo_classes.iteritems(): items = sorted(cssdict.iteritems()) css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) pstyles = pseudo_styles[psel] if css in pstyles: match = pstyles[css] else: # We have to use a different class for each psel as # otherwise you can have incorrect styles for a situation # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green } # If the pcalibre class for a:hover and a:link is the same, # then the class attribute for a.x tags will contain both # that class and the class for a.x:hover, which is wrong. klass = 'pcalibre' match = klass + str(names[klass] or '') pstyles[css] = match names[klass] += 1 keep_classes.add(match) node.attrib['class'] = ' '.join(keep_classes) elif 'class' in node.attrib: del node.attrib['class'] if 'style' in node.attrib: del node.attrib['style'] for child in node: self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
def primary_find(pat, src): 'find that ignores case and accents on letters' if _icu_not_ok: from calibre.utils.filenames import ascii_text return py_find(ascii_text(pat), ascii_text(src)) return primary_icu_find(pat, src)
def sanitize_bookmark_name(base): return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base))
def sanitize_bookmark_name(base): # Max length allowed by Word appears to be 40, we use 32 to leave some # space for making the name unique return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base))[:32].rstrip('_')
def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id): if not isinstance(node.tag, basestring) \ or namespace(node.tag) != XHTML_NS: return tag = barename(node.tag) style = stylizer.style(node) cssdict = style.cssdict() try: font_size = style['font-size'] except: font_size = self.sbase if self.sbase is not None else \ self.context.source.fbase if 'align' in node.attrib: if tag != 'img': cssdict['text-align'] = node.attrib['align'] else: val = node.attrib['align'] if val in ('middle', 'bottom', 'top'): cssdict['vertical-align'] = val elif val in ('left', 'right'): cssdict['float'] = val del node.attrib['align'] if node.tag == XHTML('font'): tags = ['descendant::h:%s'%x for x in ('p', 'div', 'table', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote')] tag = 'div' if XPath('|'.join(tags))(node) else 'span' node.tag = XHTML(tag) if 'size' in node.attrib: def force_int(raw): return int(re.search(r'([0-9+-]+)', raw).group(1)) size = node.attrib['size'].strip() if size: fnums = self.context.source.fnums if size[0] in ('+', '-'): # Oh, the warcrimes try: esize = 3 + force_int(size) except: esize = 3 if esize < 1: esize = 1 if esize > 7: esize = 7 font_size = fnums[esize] else: try: font_size = fnums[force_int(size)] except: font_size = fnums[3] cssdict['font-size'] = '%.1fpt'%font_size del node.attrib['size'] if 'face' in node.attrib: cssdict['font-family'] = node.attrib['face'] del node.attrib['face'] if 'color' in node.attrib: try: cssdict['color'] = Property('color', node.attrib['color']).value except (ValueError, SyntaxErr): pass del node.attrib['color'] if 'bgcolor' in node.attrib: try: cssdict['background-color'] = Property('background-color', node.attrib['bgcolor']).value except (ValueError, SyntaxErr): pass del node.attrib['bgcolor'] if cssdict.get('font-weight', '').lower() == 'medium': cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium fsize = font_size if not self.context.disable_font_rescaling: _sbase = self.sbase if self.sbase is not None else \ self.context.source.fbase dyn_rescale = dynamic_rescale_factor(node) if dyn_rescale is not None: fsize = self.fmap[_sbase] fsize *= dyn_rescale cssdict['font-size'] = '%0.5fem'%(fsize/psize) psize = fsize elif 'font-size' in cssdict or tag == 'body': fsize = self.fmap[font_size] try: cssdict['font-size'] = "%0.5fem" % (fsize / psize) except ZeroDivisionError: cssdict['font-size'] = '%.1fpt'%fsize psize = fsize try: minlh = self.context.minimum_line_height / 100. if style['line-height'] < minlh * fsize: cssdict['line-height'] = str(minlh) except: self.oeb.logger.exception('Failed to set minimum line-height') if cssdict: for x in self.filter_css: cssdict.pop(x, None) if cssdict: if self.lineh and self.fbase and tag != 'body': self.clean_edges(cssdict, style, psize) if 'display' in cssdict and cssdict['display'] == 'in-line': cssdict['display'] = 'inline' if self.unfloat and 'float' in cssdict \ and cssdict.get('display', 'none') != 'none': del cssdict['display'] if self.untable and 'display' in cssdict \ and cssdict['display'].startswith('table'): display = cssdict['display'] if display == 'table-cell': cssdict['display'] = 'inline' else: cssdict['display'] = 'block' if 'vertical-align' in cssdict \ and cssdict['vertical-align'] == 'sup': cssdict['vertical-align'] = 'super' if self.lineh and 'line-height' not in cssdict: lineh = self.lineh / psize cssdict['line-height'] = "%0.5fem" % lineh if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'): if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle': for prop in ('margin', 'padding', 'border'): for edge in ('top', 'bottom'): cssdict['%s-%s'%(prop, edge)] = '0pt' if self.context.insert_blank_line: cssdict['margin-top'] = cssdict['margin-bottom'] = \ '%fem'%self.context.insert_blank_line_size indent_size = self.context.remove_paragraph_spacing_indent_size keep_indents = indent_size < 0.0 if (self.context.remove_paragraph_spacing and not keep_indents and cssdict.get('text-align', None) not in ('center', 'right')): cssdict['text-indent'] = "%1.1fem" % indent_size pseudo_classes = style.pseudo_classes(self.filter_css) if cssdict or pseudo_classes: keep_classes = set() if cssdict: items = cssdict.items() items.sort() css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) classes = node.get('class', '').strip() or 'calibre' klass = ascii_text(STRIPNUM.sub('', classes.split()[0].replace('_', ''))) if css in styles: match = styles[css] else: match = klass + str(names[klass] or '') styles[css] = match names[klass] += 1 node.attrib['class'] = match keep_classes.add(match) for psel, cssdict in pseudo_classes.iteritems(): items = sorted(cssdict.iteritems()) css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) pstyles = pseudo_styles[psel] if css in pstyles: match = pstyles[css] else: # We have to use a different class for each psel as # otherwise you can have incorrect styles for a situation # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green } # If the pcalibre class for a:hover and a:link is the same, # then the class attribute for a.x tags will contain both # that class and the class for a.x:hover, which is wrong. klass = 'pcalibre' match = klass + str(names[klass] or '') pstyles[css] = match names[klass] += 1 keep_classes.add(match) node.attrib['class'] = ' '.join(keep_classes) elif 'class' in node.attrib: del node.attrib['class'] if 'style' in node.attrib: del node.attrib['style'] for child in node: self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id): if not isinstance(node.tag, string_or_bytes) \ or namespace(node.tag) != XHTML_NS: return tag = barename(node.tag) style = stylizer.style(node) cssdict = style.cssdict() try: font_size = style['font-size'] except: font_size = self.sbase if self.sbase is not None else \ self.context.source.fbase if tag == 'body' and isinstance(font_size, numbers.Number): stylizer.body_font_size = font_size if 'align' in node.attrib: if tag != 'img': cssdict['text-align'] = node.attrib['align'] if cssdict['text-align'] == 'center': # align=center causes tables to be center aligned, # which text-align does not. And the ever trustworthy Word # uses this construct in its HTML output. See # https://bugs.launchpad.net/bugs/1569583 if tag == 'table': if 'margin-left' not in cssdict and 'margin-right' not in cssdict: cssdict['margin-left'] = cssdict['margin-right'] = 'auto' else: for table in node.iterchildren(XHTML("table")): ts = stylizer.style(table) if ts.get('margin-left') is None and ts.get('margin-right') is None: ts.set('margin-left', 'auto') ts.set('margin-right', 'auto') else: val = node.attrib['align'] if val in ('middle', 'bottom', 'top'): cssdict['vertical-align'] = val elif val in ('left', 'right'): cssdict['float'] = val del node.attrib['align'] if 'valign' in node.attrib and tag == 'td': if cssdict.get('vertical-align') == 'inherit': cssdict['vertical-align'] = node.attrib['valign'] del node.attrib['valign'] if node.tag == XHTML('font'): tags = ['descendant::h:%s'%x for x in ('p', 'div', 'table', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote')] tag = 'div' if XPath('|'.join(tags))(node) else 'span' node.tag = XHTML(tag) if 'size' in node.attrib: def force_int(raw): return int(re.search(r'([0-9+-]+)', raw).group(1)) size = node.attrib['size'].strip() if size: fnums = self.context.source.fnums if size[0] in ('+', '-'): # Oh, the warcrimes try: esize = 3 + force_int(size) except: esize = 3 if esize < 1: esize = 1 if esize > 7: esize = 7 font_size = fnums[esize] else: try: font_size = fnums[force_int(size)] except: font_size = fnums[3] cssdict['font-size'] = '%.1fpt'%font_size del node.attrib['size'] if 'face' in node.attrib: cssdict['font-family'] = node.attrib['face'] del node.attrib['face'] if 'color' in node.attrib: try: cssdict['color'] = Property('color', node.attrib['color']).value except (ValueError, SyntaxErr): pass del node.attrib['color'] if 'bgcolor' in node.attrib: try: cssdict['background-color'] = Property('background-color', node.attrib['bgcolor']).value except (ValueError, SyntaxErr): pass del node.attrib['bgcolor'] if tag == 'ol' and 'type' in node.attrib: del node.attrib['type'] if cssdict.get('font-weight', '').lower() == 'medium': cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium fsize = font_size is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in cssdict and len(node) == 0 and node.text and ( len(node.text) == 1 or (len(node.text) == 2 and 0x2000 <= ord(node.text[0]) <= 0x206f))) # Detect drop caps generated by the docx input plugin if node.tag and node.tag.endswith('}p') and len(node) == 0 and node.text and len(node.text.strip()) == 1 and \ not node.tail and 'line-height' in cssdict and 'font-size' in cssdict: dp = node.getparent() if dp.tag and dp.tag.endswith('}div') and len(dp) == 1 and not dp.text: if stylizer.style(dp).cssdict().get('float', None) == 'left': is_drop_cap = True if not self.context.disable_font_rescaling and not is_drop_cap: _sbase = self.sbase if self.sbase is not None else \ self.context.source.fbase dyn_rescale = dynamic_rescale_factor(node) if dyn_rescale is not None: fsize = self.fmap[_sbase] fsize *= dyn_rescale cssdict['font-size'] = '%0.5fem'%(fsize/psize) psize = fsize elif 'font-size' in cssdict or tag == 'body': fsize = self.fmap[font_size] try: cssdict['font-size'] = "%0.5fem" % (fsize / psize) except ZeroDivisionError: cssdict['font-size'] = '%.1fpt'%fsize psize = fsize try: minlh = self.context.minimum_line_height / 100. if not is_drop_cap and style['line-height'] < minlh * fsize: cssdict['line-height'] = str(minlh) except: self.oeb.logger.exception('Failed to set minimum line-height') if cssdict: for x in self.filter_css: popval = cssdict.pop(x, None) if self.body_font_family and popval and x == 'font-family' \ and popval.partition(',')[0][1:-1] == self.body_font_family.partition(',')[0][1:-1]: cssdict[x] = popval if cssdict: if self.lineh and self.fbase and tag != 'body': self.clean_edges(cssdict, style, psize) if 'display' in cssdict and cssdict['display'] == 'in-line': cssdict['display'] = 'inline' if self.unfloat and 'float' in cssdict \ and cssdict.get('display', 'none') != 'none': del cssdict['display'] if self.untable and 'display' in cssdict \ and cssdict['display'].startswith('table'): display = cssdict['display'] if display == 'table-cell': cssdict['display'] = 'inline' else: cssdict['display'] = 'block' if 'vertical-align' in cssdict \ and cssdict['vertical-align'] == 'sup': cssdict['vertical-align'] = 'super' if self.lineh and 'line-height' not in cssdict: lineh = self.lineh / psize cssdict['line-height'] = "%0.5fem" % lineh if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'): if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle': for prop in ('margin', 'padding', 'border'): for edge in ('top', 'bottom'): cssdict['%s-%s'%(prop, edge)] = '0pt' if self.context.insert_blank_line: cssdict['margin-top'] = cssdict['margin-bottom'] = \ '%fem'%self.context.insert_blank_line_size indent_size = self.context.remove_paragraph_spacing_indent_size keep_indents = indent_size < 0.0 if (self.context.remove_paragraph_spacing and not keep_indents and cssdict.get('text-align', None) not in ('center', 'right')): cssdict['text-indent'] = "%1.1fem" % indent_size pseudo_classes = style.pseudo_classes(self.filter_css) if cssdict or pseudo_classes: keep_classes = set() if cssdict: items = sorted(iteritems(cssdict)) css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) classes = node.get('class', '').strip() or 'calibre' # lower() because otherwise if the document uses the same class # name with different case, both cases will apply, leading # to incorrect results. klass = ascii_text(STRIPNUM.sub('', classes.split()[0])).lower().strip().replace(' ', '_') if css in styles: match = styles[css] else: match = klass + str(names[klass] or '') styles[css] = match names[klass] += 1 node.attrib['class'] = match keep_classes.add(match) for psel, cssdict in iteritems(pseudo_classes): items = sorted(iteritems(cssdict)) css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) pstyles = pseudo_styles[psel] if css in pstyles: match = pstyles[css] else: # We have to use a different class for each psel as # otherwise you can have incorrect styles for a situation # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green } # If the pcalibre class for a:hover and a:link is the same, # then the class attribute for a.x tags will contain both # that class and the class for a.x:hover, which is wrong. klass = 'pcalibre' match = klass + str(names[klass] or '') pstyles[css] = match names[klass] += 1 keep_classes.add(match) node.attrib['class'] = ' '.join(keep_classes) elif 'class' in node.attrib: del node.attrib['class'] if 'style' in node.attrib: del node.attrib['style'] for child in node: self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
def initialise_new_file(self, pathtoebook): self.meta, self.errors = {}, {} self.rename_file_map = {} self.is_scrambled = False self.dummyimg = None self.dummysvg = '' self.runButton.setEnabled(True) self.buttonBox.button(QDialogButtonBox.Save).setEnabled(False) fileok = True if not os.path.isfile(pathtoebook): fileok = False else: try: self.ebook = get_container(pathtoebook) except: fileok = False msg = "Source ebook must be de-DRM'd and in one of these formats:" \ "\n- azw3\n- epub\n- kepub\n- kepub.epub.\n\nPlease select another." error_dialog(self, CAPTION, msg, show=True, show_copy_button=True) if not fileok: self.log.append('No ebook selected yet') else: self.cleanup_dirs.append(self.ebook.root) tdir = PersistentTemporaryDirectory('_scramble_clone_orig') self.cleanup_dirs.append(tdir) self.eborig = clone_container(self.ebook, tdir) dirn, fname, ext, is_kepub_epub = get_fileparts( self.ebook.path_to_ebook) ext = ext.lower() format = 'kepub' if is_kepub_epub else ext if self.book_id is not None: # calibre library book self.cleanup_files.append(self.ebook.path_to_ebook) sourcepath = self.ebook.path_to_ebook self.dummyimg = get_resources('images/' + format + '.png') self.dummysvg = get_resources('images/' + format + '.svg') if self.from_calibre: # calibre plugin self.dirout = '' else: # standalone version self.dirout = dirn self.log.append('\n--- New ebook: %s' % sourcepath) fn = fname + '_scrambled.' fn += 'kepub.' + ext if is_kepub_epub else ext self.fname_scrambled_ebook = ascii_text(fn) self.sourcefile.setText(sourcepath) self.savefile.setText(self.fname_scrambled_ebook) self.meta['orig'] = get_metadata(self.ebook) self.errors['orig'] = get_run_check_error(self.ebook) self.viewlog()