示例#1
0
文件: utils.py 项目: kba/calibre
def parse_css(data,
              fname='<string>',
              is_declaration=False,
              decode=None,
              log_level=None,
              css_preprocessor=None):
    if log_level is None:
        import logging
        log_level = logging.WARNING
    from cssutils import CSSParser, log
    from calibre.ebooks.oeb.base import _css_logger
    log.setLevel(log_level)
    log.raiseExceptions = False
    if isinstance(data, bytes):
        data = data.decode('utf-8') if decode is None else decode(data)
    if css_preprocessor is not None:
        data = css_preprocessor(data)
    parser = CSSParser(
        loglevel=log_level,
        # We dont care about @import rules
        fetcher=lambda x: (None, None),
        log=_css_logger)
    if is_declaration:
        data = parser.parseStyle(data, validate=False)
    else:
        data = parser.parseString(data, href=fname, validate=False)
    return data
示例#2
0
def normalize_filter_css(props):
    import logging
    ans = set()
    p = CSSParser(loglevel=logging.CRITICAL, validate=False)
    for prop in props:
        n = normalizers.get(prop, None)
        ans.add(prop)
        if n is not None and prop in SHORTHAND_DEFAULTS:
            dec = p.parseStyle('%s: %s' % (prop, SHORTHAND_DEFAULTS[prop]))
            cssvalue = dec.getPropertyCSSValue(dec.item(0))
            ans |= set(n(prop, cssvalue))
    return ans
示例#3
0
def normalize_filter_css(props):
    import logging
    ans = set()
    p = CSSParser(loglevel=logging.CRITICAL, validate=False)
    for prop in props:
        n = normalizers.get(prop, None)
        ans.add(prop)
        if n is not None and prop in SHORTHAND_DEFAULTS:
            dec = p.parseStyle('%s: %s' % (prop, SHORTHAND_DEFAULTS[prop]))
            cssvalue = dec.getPropertyCSSValue(dec.item(0))
            ans |= set(n(prop, cssvalue))
    return ans
示例#4
0
 def parse_css(self, data, fname='<string>', is_declaration=False):
     from cssutils import CSSParser, log
     log.setLevel(logging.WARN)
     log.raiseExceptions = False
     if isinstance(data, bytes):
         data = self.decode(data)
     if not self.tweak_mode:
         data = self.css_preprocessor(data)
     parser = CSSParser(loglevel=logging.WARNING,
                        # We dont care about @import rules
                        fetcher=lambda x: (None, None), log=_css_logger)
     if is_declaration:
         data = parser.parseStyle(data, validate=False)
     else:
         data = parser.parseString(data, href=fname, validate=False)
     return data
示例#5
0
 def parse_css(self, data, fname='<string>', is_declaration=False):
     from cssutils import CSSParser, log
     log.setLevel(logging.WARN)
     log.raiseExceptions = False
     if isinstance(data, bytes):
         data = self.decode(data)
     if not self.tweak_mode:
         data = self.css_preprocessor(data)
     parser = CSSParser(loglevel=logging.WARNING,
                        # We dont care about @import rules
                        fetcher=lambda x: (None, None), log=_css_logger)
     if is_declaration:
         data = parser.parseStyle(data, validate=False)
     else:
         data = parser.parseString(data, href=fname, validate=False)
     return data
示例#6
0
def parse_css(data, fname='<string>', is_declaration=False, decode=None, log_level=None, css_preprocessor=None):
    if log_level is None:
        import logging
        log_level = logging.WARNING
    from cssutils import CSSParser, log
    from calibre.ebooks.oeb.base import _css_logger
    log.setLevel(log_level)
    log.raiseExceptions = False
    if isinstance(data, bytes):
        data = data.decode('utf-8') if decode is None else decode(data)
    if css_preprocessor is not None:
        data = css_preprocessor(data)
    parser = CSSParser(loglevel=log_level,
                        # We dont care about @import rules
                        fetcher=lambda x: (None, None), log=_css_logger)
    if is_declaration:
        data = parser.parseStyle(data, validate=False)
    else:
        data = parser.parseString(data, href=fname, validate=False)
    return data
示例#7
0
class StatsCollector(object):

    def __init__(self, container, do_embed=False):
        self.container = container
        self.log = self.logger = container.log
        self.do_embed = do_embed
        must_use_qt()
        self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
        self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect,
                type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}
        self.font_usage_map = {}
        self.font_spec_map = {}
        self.font_rule_map = {}
        self.all_font_rules = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception('Failed to gather statistics from book, see log for details')

    def log_exception(self, *args):
        orig = self.log.filter_level
        try:
            self.log.filter_level = self.log.DEBUG
            self.log.exception(*args)
        finally:
            self.log.filter_level = orig

    def render_book(self):
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.log_exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.current_item = item
        load_html(item, self.view)

    def collect(self, ok):
        if not ok:
            self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item))
            self.loop.exit(1)
            return
        try:
            self.page.load_js()
            self.collect_font_stats()
        except:
            self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
            self.loop.exit(1)
            return

        self.render_book()

    def href_to_name(self, href, warn_name):
        if not href.startswith('file://'):
            self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
            return None
        src = href[len('file://'):]
        if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
            src = src[1:]
        src = src.replace('/', os.sep)
        src = unquote(src)
        name = self.container.abspath_to_name(src)
        if not self.container.has_name(name):
            self.log.warn('Missing resource', href, 'in', warn_name,
                          'ignoring')
            return None
        return name

    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception('Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        import tinycss
        parser = tinycss.make_full_parser()
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff:
                continue
            style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
            ff = [x.value for x in
                  style.getProperty('font-family').propertyValue]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src:
                continue
            try:
                tokens = parser.parse_stylesheet('@font-face { src: %s }' % src).rules[0].declarations[0].value
            except Exception:
                self.log.warn('Failed to parse @font-family src: %s' % src)
                continue
            for token in tokens:
                if token.type == 'URI':
                    uv = token.value
                    if uv:
                        sn = self.href_to_name(uv, '@font-face rule')
                        if sn is not None:
                            rule['src'] = sn
                            break
            else:
                self.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % src)
                continue
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules and not self.do_embed:
            return

        self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
        for rule in rules:
            self.all_font_rules[rule['src']] = rule

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
        pseudo_element_font_usage = self.page.bridge_value
        if not isinstance(pseudo_element_font_usage, list):
            raise Exception('Unknown error occurred while reading pseudo element font usage')
        font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
        exclude = {'\n', '\r', '\t'}
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
        bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
        for font in font_usage:
            text = set()
            for t in font['text']:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text:
                continue
            normalize_font_properties(font)
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
            if self.do_embed:
                ff = [icu_lower(x) for x in font.get('font-family', [])]
                if ff and ff[0] not in bad_fonts:
                    keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
                    key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
                    val = fu[key]
                    if not val:
                        val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
                        val['text'] = set()
                    val['text'] |= text
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)

        if self.do_embed:
            self.page.evaljs('window.font_stats.get_font_families()')
            font_families = self.page.bridge_value
            if not isinstance(font_families, dict):
                raise Exception('Unknown error occurred while reading font families')
            self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
            for font_dict, text, pseudo in pseudo_element_font_usage:
                font_families[font_dict['font-family']] = True
            for raw in font_families.iterkeys():
                for x in parse_font_families(self.parser, raw):
                    if x.lower() not in bad_fonts:
                        fs.add(x)
示例#8
0
class StatsCollector(object):

    def __init__(self, container, do_embed=False):
        self.container = container
        self.log = self.logger = container.log
        self.do_embed = do_embed
        must_use_qt()
        self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
        self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
        self.capitalize_pat = regex.compile(r'[\p{L}\p{N}]', regex.VERSION1 | regex.UNICODE)

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect,
                type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}
        self.font_usage_map = {}
        self.font_spec_map = {}
        self.font_rule_map = {}
        self.all_font_rules = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception('Failed to gather statistics from book, see log for details')

    def log_exception(self, *args):
        orig = self.log.filter_level
        try:
            self.log.filter_level = self.log.DEBUG
            self.log.exception(*args)
        finally:
            self.log.filter_level = orig

    def render_book(self):
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.log_exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.current_item = item
        load_html(item, self.view)

    def collect(self, ok):
        if not ok:
            self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item))
            self.loop.exit(1)
            return
        try:
            self.page.load_js()
            self.collect_font_stats()
        except:
            self.log_exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
            self.loop.exit(1)
            return

        self.render_book()

    def href_to_name(self, href, warn_name):
        if not href.startswith('file://'):
            self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
            return None
        src = href[len('file://'):]
        if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
            src = src[1:]
        src = src.replace('/', os.sep)
        src = unquote(src)
        name = self.container.abspath_to_name(src)
        if not self.container.has_name(name):
            self.log.warn('Missing resource', href, 'in', warn_name,
                          'ignoring')
            return None
        return name

    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception('Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        import tinycss
        parser = tinycss.make_full_parser()
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff:
                continue
            style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
            ff = [x.value for x in
                  style.getProperty('font-family').propertyValue]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src:
                continue
            try:
                tokens = parser.parse_stylesheet('@font-face { src: %s }' % src).rules[0].declarations[0].value
            except Exception:
                self.log.warn('Failed to parse @font-family src: %s' % src)
                continue
            for token in tokens:
                if token.type == 'URI':
                    uv = token.value
                    if uv:
                        sn = self.href_to_name(uv, '@font-face rule')
                        if sn is not None:
                            rule['src'] = sn
                            break
            else:
                self.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % src)
                continue
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules and not self.do_embed:
            return

        self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
        for rule in rules:
            self.all_font_rules[rule['src']] = rule

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
        pseudo_element_font_usage = self.page.bridge_value
        if not isinstance(pseudo_element_font_usage, list):
            raise Exception('Unknown error occurred while reading pseudo element font usage')
        font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
        exclude = {'\n', '\r', '\t'}
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
        bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
        for font in font_usage:
            text = set()
            for t in font['text']:
                tt = (font['text-transform'] or '').lower()
                if tt != 'none':
                    if tt == 'uppercase':
                        t = icu_upper(t)
                    elif tt == 'lowercase':
                        t = icu_lower(t)
                    elif tt == 'capitalize':
                        m = self.capitalize_pat.search(t)
                        if m is not None:
                            t += icu_upper(m.group())
                fv = (font['font-variant'] or '').lower()
                if fv in {'smallcaps', 'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase'}:
                    t += icu_upper(t)  # for renderers that try to fake small-caps by using small normal caps
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text:
                continue
            normalize_font_properties(font)
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
            if self.do_embed:
                ff = [icu_lower(x) for x in font.get('font-family', [])]
                if ff and ff[0] not in bad_fonts:
                    keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
                    key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
                    val = fu[key]
                    if not val:
                        val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
                        val['text'] = set()
                    val['text'] |= text
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)

        if self.do_embed:
            self.page.evaljs('window.font_stats.get_font_families()')
            font_families = self.page.bridge_value
            if not isinstance(font_families, dict):
                raise Exception('Unknown error occurred while reading font families')
            self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
            for font_dict, text, pseudo in pseudo_element_font_usage:
                font_families[font_dict['font-family']] = True
            for raw in font_families.iterkeys():
                for x in parse_font_families(self.parser, raw):
                    if x.lower() not in bad_fonts:
                        fs.add(x)
示例#9
0
文件: stats.py 项目: piewsook/calibre
class StatsCollector(object):

    def __init__(self, container, do_embed=False):
        self.container = container
        self.log = self.logger = container.log
        self.do_embed = do_embed
        must_use_qt()
        self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect,
                type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}
        self.font_usage_map = {}
        self.font_spec_map = {}
        self.font_rule_map = {}
        self.all_font_rules = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception('Failed to gather statistics from book, see log for details')

    def render_book(self):
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.logger.exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.current_item = item
        load_html(item, self.view)

    def collect(self, ok):
        if not ok:
            self.log.error('Failed to render document: %s'%self.container.relpath(self.current_item))
            self.loop.exit(1)
            return
        try:
            self.page.load_js()
            self.collect_font_stats()
        except:
            self.log.exception('Failed to collect font stats from: %s'%self.container.relpath(self.current_item))
            self.loop.exit(1)
            return

        self.render_book()

    def href_to_name(self, href, warn_name):
        if not href.startswith('file://'):
            self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
            return None
        src = href[len('file://'):]
        if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
            src = src[1:]
        src = src.replace('/', os.sep)
        src = unquote(src)
        name = self.container.abspath_to_name(src)
        if not self.container.has_name(name):
            self.log.warn('Missing resource', href, 'in', warn_name,
                          'ignoring')
            return None
        return name

    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception('Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff:
                continue
            style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
            ff = [x.value for x in
                  style.getProperty('font-family').propertyValue]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src:
                continue
            if src.startswith('url(') and src.endswith(')') and src[4] not in {'"', "'"}:
                # Quote the url otherwise cssutils fails to parse it if it has
                # ' or " in it
                src = "url('" + src[4:-1].replace("'", "\\'") + "')"
            style = self.parser.parseStyle('background-image:%s'%src, validate=False)
            src = style.getProperty('background-image').propertyValue[0].uri
            name = self.href_to_name(src, '@font-face rule')
            if name is None:
                continue
            rule['src'] = name
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules and not self.do_embed:
            return

        self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
        for rule in rules:
            self.all_font_rules[rule['src']] = rule

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        exclude = {'\n', '\r', '\t'}
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
        bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
        for font in font_usage:
            text = set()
            for t in font['text']:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text:
                continue
            normalize_font_properties(font)
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
            if self.do_embed:
                ff = [icu_lower(x) for x in font.get('font-family', [])]
                if ff and ff[0] not in bad_fonts:
                    keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
                    key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
                    val = fu[key]
                    if not val:
                        val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
                        val['text'] = set()
                    val['text'] |= text
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)

        if self.do_embed:
            self.page.evaljs('window.font_stats.get_font_families()')
            font_families = self.page.bridge_value
            if not isinstance(font_families, dict):
                raise Exception('Unknown error occurred while reading font families')
            self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
            for raw in font_families.iterkeys():
                style = self.parser.parseStyle('font-family:' + raw, validate=False).getProperty('font-family')
                for x in style.propertyValue:
                    x = x.value
                    if x and x.lower() not in bad_fonts:
                        fs.add(x)
示例#10
0
class StatsCollector(object):
    def __init__(self, container, do_embed=False):
        self.container = container
        self.log = self.logger = container.log
        self.do_embed = do_embed
        must_use_qt()
        self.parser = CSSParser(loglevel=logging.CRITICAL,
                                log=logging.getLogger('calibre.css'))
        self.first_letter_pat = regex.compile(
            r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+',
            regex.VERSION1 | regex.UNICODE)

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}
        self.font_usage_map = {}
        self.font_spec_map = {}
        self.font_rule_map = {}
        self.all_font_rules = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception(
                'Failed to gather statistics from book, see log for details')

    def log_exception(self, *args):
        orig = self.log.filter_level
        try:
            self.log.filter_level = self.log.DEBUG
            self.log.exception(*args)
        finally:
            self.log.filter_level = orig

    def render_book(self):
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.log_exception('Rendering failed')
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.current_item = item
        load_html(item, self.view)

    def collect(self, ok):
        if not ok:
            self.log.error('Failed to render document: %s' %
                           self.container.relpath(self.current_item))
            self.loop.exit(1)
            return
        try:
            self.page.load_js()
            self.collect_font_stats()
        except:
            self.log_exception('Failed to collect font stats from: %s' %
                               self.container.relpath(self.current_item))
            self.loop.exit(1)
            return

        self.render_book()

    def href_to_name(self, href, warn_name):
        if not href.startswith('file://'):
            self.log.warn('Non-local URI in', warn_name, ':', href, 'ignoring')
            return None
        src = href[len('file://'):]
        if iswindows and len(src) > 2 and (src[0], src[2]) == ('/', ':'):
            src = src[1:]
        src = src.replace('/', os.sep)
        src = unquote(src)
        name = self.container.abspath_to_name(src)
        if not self.container.has_name(name):
            self.log.warn('Missing resource', href, 'in', warn_name,
                          'ignoring')
            return None
        return name

    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception(
                'Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff:
                continue
            style = self.parser.parseStyle('font-family:%s' % ff,
                                           validate=False)
            ff = [
                x.value for x in style.getProperty('font-family').propertyValue
            ]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src:
                continue
            if src.startswith('url(') and src.endswith(')') and src[4] not in {
                    '"', "'"
            }:
                # Quote the url otherwise cssutils fails to parse it if it has
                # ' or " in it
                src = "url('" + src[4:-1].replace("'", "\\'") + "')"
            style = self.parser.parseStyle('background-image:%s' % src,
                                           validate=False)
            src = style.getProperty('background-image').propertyValue[0].uri
            name = self.href_to_name(src, '@font-face rule')
            if name is None:
                continue
            rule['src'] = name
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules and not self.do_embed:
            return

        self.font_rule_map[self.container.abspath_to_name(
            self.current_item)] = rules
        for rule in rules:
            self.all_font_rules[rule['src']] = rule

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
        pseudo_element_font_usage = self.page.bridge_value
        if not isinstance(pseudo_element_font_usage, list):
            raise Exception(
                'Unknown error occurred while reading pseudo element font usage'
            )
        font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage,
                                                    self.first_letter_pat,
                                                    self.parser)
        exclude = {'\n', '\r', '\t'}
        self.font_usage_map[self.container.abspath_to_name(
            self.current_item)] = fu = defaultdict(dict)
        bad_fonts = {
            'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy',
            'sansserif', 'inherit'
        }
        for font in font_usage:
            text = set()
            for t in font['text']:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text:
                continue
            normalize_font_properties(font)
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
            if self.do_embed:
                ff = [icu_lower(x) for x in font.get('font-family', [])]
                if ff and ff[0] not in bad_fonts:
                    keys = {
                        'font-weight', 'font-style', 'font-stretch',
                        'font-family'
                    }
                    key = frozenset(((k, ff[0] if k == 'font-family' else v)
                                     for k, v in font.iteritems()
                                     if k in keys))
                    val = fu[key]
                    if not val:
                        val.update({
                            k: (font[k][0] if k == 'font-family' else font[k])
                            for k in keys
                        })
                        val['text'] = set()
                    val['text'] |= text
        self.font_usage_map[self.container.abspath_to_name(
            self.current_item)] = dict(fu)

        if self.do_embed:
            self.page.evaljs('window.font_stats.get_font_families()')
            font_families = self.page.bridge_value
            if not isinstance(font_families, dict):
                raise Exception(
                    'Unknown error occurred while reading font families')
            self.font_spec_map[self.container.abspath_to_name(
                self.current_item)] = fs = set()
            for font_dict, text, pseudo in pseudo_element_font_usage:
                font_families[font_dict['font-family']] = True
            for raw in font_families.iterkeys():
                for x in parse_font_families(self.parser, raw):
                    if x.lower() not in bad_fonts:
                        fs.add(x)
示例#11
0
class StatsCollector(object):
    def __init__(self, container, do_embed=False):
        self.container = container
        self.log = self.logger = container.log
        self.do_embed = do_embed
        must_use_qt()
        self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger("calibre.css"))

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect, type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}
        self.font_usage_map = {}
        self.font_spec_map = {}
        self.font_rule_map = {}
        self.all_font_rules = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception("Failed to gather statistics from book, see log for details")

    def render_book(self):
        try:
            if not self.render_queue:
                self.loop.exit()
            else:
                self.render_next()
        except:
            self.logger.exception("Rendering failed")
            self.loop.exit(1)

    def render_next(self):
        item = unicode(self.render_queue.pop(0))
        self.current_item = item
        load_html(item, self.view)

    def collect(self, ok):
        if not ok:
            self.log.error("Failed to render document: %s" % self.container.relpath(self.current_item))
            self.loop.exit(1)
            return
        try:
            self.page.load_js()
            self.collect_font_stats()
        except:
            self.log.exception("Failed to collect font stats from: %s" % self.container.relpath(self.current_item))
            self.loop.exit(1)
            return

        self.render_book()

    def href_to_name(self, href, warn_name):
        if not href.startswith("file://"):
            self.log.warn("Non-local URI in", warn_name, ":", href, "ignoring")
            return None
        src = href[len("file://") :]
        if iswindows and len(src) > 2 and (src[0], src[2]) == ("/", ":"):
            src = src[1:]
        src = src.replace("/", os.sep)
        src = unquote(src)
        name = self.container.abspath_to_name(src)
        if not self.container.has_name(name):
            self.log.warn("Missing resource", href, "in", warn_name, "ignoring")
            return None
        return name

    def collect_font_stats(self):
        self.page.evaljs("window.font_stats.get_font_face_rules()")
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception("Unknown error occurred while reading font-face rules")

        # Weed out invalid font-face rules
        rules = []
        for rule in font_face_rules:
            ff = rule.get("font-family", None)
            if not ff:
                continue
            style = self.parser.parseStyle("font-family:%s" % ff, validate=False)
            ff = [x.value for x in style.getProperty("font-family").propertyValue]
            if not ff or ff[0] == "inherit":
                continue
            rule["font-family"] = frozenset(icu_lower(f) for f in ff)
            src = rule.get("src", None)
            if not src:
                continue
            style = self.parser.parseStyle("background-image:%s" % src, validate=False)
            src = style.getProperty("background-image").propertyValue[0].uri
            name = self.href_to_name(src, "@font-face rule")
            if name is None:
                continue
            rule["src"] = name
            normalize_font_properties(rule)
            rule["width"] = widths[rule["font-stretch"]]
            rule["weight"] = int(rule["font-weight"])
            rules.append(rule)

        if not rules and not self.do_embed:
            return

        self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
        for rule in rules:
            self.all_font_rules[rule["src"]] = rule

        for rule in rules:
            if rule["src"] not in self.font_stats:
                self.font_stats[rule["src"]] = set()

        self.page.evaljs("window.font_stats.get_font_usage()")
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception("Unknown error occurred while reading font usage")
        exclude = {"\n", "\r", "\t"}
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
        bad_fonts = {"serif", "sans-serif", "monospace", "cursive", "fantasy", "sansserif", "inherit"}
        for font in font_usage:
            text = set()
            for t in font["text"]:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text:
                continue
            normalize_font_properties(font)
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule["src"]] |= text
            if self.do_embed:
                ff = [icu_lower(x) for x in font.get("font-family", [])]
                if ff and ff[0] not in bad_fonts:
                    keys = {"font-weight", "font-style", "font-stretch", "font-family"}
                    key = frozenset(((k, ff[0] if k == "font-family" else v) for k, v in font.iteritems() if k in keys))
                    val = fu[key]
                    if not val:
                        val.update({k: (font[k][0] if k == "font-family" else font[k]) for k in keys})
                        val["text"] = set()
                    val["text"] |= text
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)

        if self.do_embed:
            self.page.evaljs("window.font_stats.get_font_families()")
            font_families = self.page.bridge_value
            if not isinstance(font_families, dict):
                raise Exception("Unknown error occurred while reading font families")
            self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
            for raw in font_families.iterkeys():
                style = self.parser.parseStyle("font-family:" + raw, validate=False).getProperty("font-family")
                for x in style.propertyValue:
                    x = x.value
                    if x and x.lower() not in bad_fonts:
                        fs.add(x)