def test_character_name(self): ' Test character naming ' from calibre.utils.unicode_names import character_name_from_code for q, e in { '\U0001f431': 'CAT FACE' }.items(): self.ae(icu.character_name(q), e) self.ae(character_name_from_code(icu.ord_string(q)[0]), e)
def add_chars(text, counter, file_name): if text: if isinstance(text, bytes): text = text.decode('utf-8', 'ignore') counts = Counter(ord_string(text)) counter.update(counts) for codepoint in counts: counter.chars[codepoint].add(file_name)
def get_element_font_usage(self, elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed, font_usage_map, font_spec): text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat) if not text: return def update_usage_for_embed(font, chars): if not do_embed: return ff = [icu_lower(x) for x in font.get('font-family', ())] if ff and ff[0] not in bad_fonts: key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in font_keys)) val = font_usage_map.get(key) if val is None: val = font_usage_map[key] = {'text': set()} for k in font_keys: val[k] = font[k][0] if k == 'font-family' else font[k] val['text'] |= chars for ff in font.get('font-family', ()): if ff and icu_lower(ff) not in bad_fonts: font_spec.add(ff) font = get_font_dict(elem, resolve_property) chars = frozenset(ord_string(text)) - exclude_chars update_usage_for_embed(font, chars) for rule in get_matching_rules(font_face_rules, font): self.font_stats[rule['src']] |= chars q = resolve_pseudo_property(elem, 'first-letter', 'font-family', abort_on_missing=True) if q is not None: font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter') text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-letter') m = self.first_letter_pat.search(text.lstrip()) if m is not None: chars = frozenset(ord_string(m.group())) - exclude_chars update_usage_for_embed(font, chars) for rule in get_matching_rules(font_face_rules, font): self.font_stats[rule['src']] |= chars q = resolve_pseudo_property(elem, 'first-line', 'font-family', abort_on_missing=True) if q is not None: font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter') text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-line') chars = frozenset(ord_string(text)) - exclude_chars update_usage_for_embed(font, chars) for rule in get_matching_rules(font_face_rules, font): self.font_stats[rule['src']] |= chars
def html_entities(): ans = getattr(html_entities, 'ans', None) if ans is None: from calibre.ebooks.html_entities import html5_entities ans = defaultdict(set) for name, char in iteritems(html5_entities): try: ans[name.lower()].add(ord_string(char)[0]) except TypeError: continue ans['nnbsp'].add(0x202F) ans = dict(ans) html_entities.ans = ans return ans
def update_position(self, line=None, col=None, character=None): if line is None: self.la.setText('') else: try: name = character_name_from_code(ord_string(character)[0]) if character and tprefs['editor_show_char_under_cursor'] else None except Exception: name = None text = _('Line: {0} : {1}').format(line, col) if not name: name = {'\t':'TAB'}.get(character, None) if name and tprefs['editor_show_char_under_cursor']: text = name + ' : ' + text self.la.setText(text)
def chars_data(container, *args): chars = defaultdict(set) counter = Counter() def count(codepoint): counter[codepoint] += 1 for name, is_linear in container.spine_names: if container.mime_map.get(name) not in OEB_DOCS: continue raw = container.raw_data(name) counts = Counter(ord_string(raw)) counter.update(counts) for codepoint in counts: chars[codepoint].add(name) nmap = {n:i for i, (n, l) in enumerate(container.spine_names)} def sort_key(name): return nmap.get(name, len(nmap)), numeric_sort_key(name) for i, (codepoint, usage) in enumerate(chars.iteritems()): yield Char(i, safe_chr(codepoint), codepoint, sorted(usage, key=sort_key), counter[codepoint])
def chars_data(container, book_locale): chars = defaultdict(set) counter = Counter() def count(codepoint): counter[codepoint] += 1 for name, is_linear in container.spine_names: if container.mime_map.get(name) not in OEB_DOCS: continue raw = container.raw_data(name) counts = Counter(ord_string(raw)) counter.update(counts) for codepoint in counts: chars[codepoint].add(name) nmap = {n:i for i, (n, l) in enumerate(container.spine_names)} def sort_key(name): return nmap.get(name, len(nmap)), numeric_sort_key(name) for i, (codepoint, usage) in enumerate(chars.iteritems()): yield Char(i, safe_chr(codepoint), codepoint, sorted(usage, key=sort_key), counter[codepoint])
def map_symbol_text(text, font): m = SYMBOL_MAPS[font] if isinstance(text, bytes): text = text.decode('utf-8') return ''.join(do_map(m, ord_string(text)))
def map_symbol_text(text, font): m = SYMBOL_MAPS[font] return ''.join(do_map(m, ord_string(text)))
def safe_ord(x): return ord_string(unicode_type(x))[0]
ff = resolve_property(elem, pseudo, 'font-family') ans['font-family'] = tuple(x.value for x in ff) for p in 'weight', 'style', 'stretch': p = 'font-' + p rp = resolve_property(elem, p) if pseudo is None else resolve_property( elem, pseudo, p) ans[p] = str(rp[0].value) normalize_font_properties(ans) return ans bad_fonts = { 'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit' } exclude_chars = frozenset(ord_string('\n\r\t')) skip_tags = {XHTML(x) for x in 'script style title meta link'.split()} font_keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} def prepare_font_rule(cssdict): cssdict['font-family'] = frozenset(cssdict['font-family'][:1]) cssdict['width'] = widths[cssdict['font-stretch']] cssdict['weight'] = int(cssdict['font-weight']) class StatsCollector: first_letter_pat = capitalize_pat = None def __init__(self, container, do_embed=False):
def get_font_dict(elem, resolve_property, pseudo=None): ans = {} if pseudo is None: ff = resolve_property(elem, 'font-family') else: ff = resolve_property(elem, pseudo, 'font-family') ans['font-family'] = tuple(x.value for x in ff) for p in 'weight', 'style', 'stretch': p = 'font-' + p rp = resolve_property(elem, p) if pseudo is None else resolve_property(elem, pseudo, p) ans[p] = type('')(rp[0].value) normalize_font_properties(ans) return ans bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'} exclude_chars = frozenset(ord_string('\n\r\t')) skip_tags = {XHTML(x) for x in 'script style title meta link'.split()} font_keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'} def prepare_font_rule(cssdict): cssdict['font-family'] = frozenset(cssdict['font-family'][:1]) cssdict['width'] = widths[cssdict['font-stretch']] cssdict['weight'] = int(cssdict['font-weight']) class StatsCollector(object): first_letter_pat = capitalize_pat = None def __init__(self, container, do_embed=False): if self.first_letter_pat is None: StatsCollector.first_letter_pat = self.first_letter_pat = regex.compile(
def safe_ord(x): return ord_string(str(x))[0]