def _format_css_rules(content: list, indent_level: int) -> str: """ Helper function for CSS formatting that formats a list of CSS selectors. INPUTS content: A list of component values generated by the tinycss2 library OUTPUTS A string of formatted CSS """ output = "" for token in tinycss2.parse_rule_list(content): if token.type == "error": raise se.InvalidCssException("Couldn’t parse CSS. Exception: {token.message}") if token.type == "qualified-rule": output += ("\t" * indent_level) + _format_css_component_list(token.prelude, True).replace("\n", "\n" + ("\t" * indent_level)) + "{\n" + _format_css_declarations(token.content, indent_level + 1) + "\n" + ("\t" * indent_level) + "}\n\n" if token.type == "at-rule": output += ("\t" * indent_level) + "@" + token.lower_at_keyword + " " + _format_css_component_list(token.prelude, True).replace("\n", " ") + "{\n" + _format_css_rules(token.content, indent_level + 1) + "\n" + ("\t" * indent_level) + "}\n\n" if token.type == "comment": # House style: If the comment starts with /* End, then attach it to the previous block if token.value.strip().lower().startswith("end"): output = output.rstrip() + "\n" output += ("\t" * indent_level) + "/* " + token.value.strip() + " */\n" if token.value.strip().lower().startswith("end"): output += "\n" return output.rstrip()
def _find_rules_selectors(self, rule): """ Find all selectors for a given CSS rule :param rule: rule to find selectors :return: list of selectors found """ selectors = [] # types: QualifiedRule, AtRule, Comment if rule.type == 'at-rule' and rule.content: ruleList = tinycss2.parse_rule_list(rule.content) for r in ruleList: selectors += self._find_rules_selectors(r) return selectors if rule.type == 'qualified-rule': rule_selectors = rule.serialize().split('{')[0] selectors += rule_selectors.split(',') # ignore comments if rule.type == 'comment': pass if rule.type == 'error': pass # rules not covered (@todo Cover this rules if they showup in any new css library used) if rule.type not in ('comment', 'at-rule', 'qualified-rule', 'error', 'whitespace'): pass return selectors
def _remove_unused_selectors_from_rule(self, rule, unused): new_css = '' # ignore @media queries if rule.type == 'at-rule' and rule.lower_at_keyword == 'media': rule_list = tinycss2.parse_rule_list(rule.content) new_css = '' for r in rule_list: # recursion to deal with @media elements css = self._remove_unused_selectors_from_rule(r, unused) new_css += css # reconstruct media CSS if any rule is left inside @media if new_css: media_css = '@media' for node in rule.prelude: media_css += node.serialize() # log.debug(node.serialize()) media_css = media_css + '{' + new_css + '}' return media_css else: return '' # check if there are any used selectors rule_css = rule.serialize() selectors = self._get_all_selectors(rule_css) selectors_left = selectors[:] for sel in selectors: if sel in unused: selectors_left.remove(sel) # .nav,.sidebar li,.thumbnails{list-style:none} # remove class: .thumbnails rule_css = rule_css.replace(',' + sel + '{', '{') # remove class: .sidebar li or .nav rule_css = rule_css.replace(sel + ',', '') # remove class: .nav{ rule_css = rule_css.replace(sel + '{', '{') # if any selector left... uses the remaining css if selectors_left: if self._is_css_valid(rule_css): new_css += rule_css else: logger.error( 'Invalid CSS rule detected after removing unused classes!') logger.error(rule_css) exit() return new_css
def gen_js_style_nodes(self, node_type, styles): res = [] # Parsing the node for style in styles: parsed = tinycss2.parse_rule_list(style.text) for node in parsed: if isinstance(node, tinycss2.ast.AtRule ) and node.lower_at_keyword == node_type: res.append(node) return res
def validate_at_rule(self, rule): prelude_errors = self.validate_component_values(rule.prelude) keyword = strip_vendor_prefix(rule.lower_at_keyword) if keyword in ("media", "keyframes"): rules = tinycss2.parse_rule_list(rule.content) rule_errors = self.validate_rule_list(rules) elif keyword == "page": rule_errors = self.validate_qualified_rule(rule) else: return ValidationError(rule.source_line, "UNKNOWN_AT_RULE", {"keyword": rule.at_keyword}) return itertools.chain(prelude_errors, rule_errors)
def find_urls_in_css_rules(name, css_rules): """ Find the url of the given image name in the given style. >>> stylesheet = get_embedded_styles(get_html_tree('https://www.apple.com/retail/fifthavenue/'), 'image-hero')[0] >>> css_rules = tinycss2.parse_stylesheet(stylesheet, skip_comments=True, skip_whitespace=True) >>> find_urls_in_css_rules('image-hero', css_rules)[1] 'https://www.apple.com/retail/fifthavenue/images/hero_large_2x.jpg' """ urls = [] for rule in css_rules: if rule.type == 'qualified-rule': url = _get_url_if_contains_token_name(name, rule) urls.append(_format_url(url)) if url else None elif rule.type == 'at-rule' and rule.at_keyword == 'media': nested_rules = tinycss2.parse_rule_list(rule.content, skip_whitespace=True) urls.extend(find_urls_in_css_rules(name, nested_rules)) return urls
def extract_inline_style_source(self, style): """ Extracts sources for an inline style tag Parses inline style tags in order to extract sources that are relevant to the style-src directive. :param style: style tag to extract the source from :return: None """ parsed = tinycss2.parse_rule_list(style.text) for node in parsed: if isinstance(node, tinycss2.ast.AtRule): print(node, type(node)) if node.lower_at_keyword == 'import': print('import') self.find_import_source(node) if node.lower_at_keyword == 'font-face': print('font-face') self.find_fontface_source(node)
def __init__(self, rule, parent_style_sheet=None, parent_rule=None): """Constructs a CSSMediaRule object. Arguments: rule: A parsed CSS at-rule object. parent_style_sheet (CSSStyleSheet, optional): The parent CSS style sheet. parent_rule (CSSRule, optional): The parent CSS rule. """ super().__init__(rule, CSSRule.MEDIA_RULE, parent_style_sheet=parent_style_sheet, parent_rule=parent_rule) self._media = MediaList() self._media.media_text = tinycss2.serialize(rule.prelude) rules = tinycss2.parse_rule_list(rule.content, skip_comments=True, skip_whitespace=True) css_rules = CSSParser.parse_rules( rules, parent_style_sheet=parent_style_sheet, parent_rule=self) self.css_rules.extend(css_rules)
def _parse_content(self, content): nodes = tinycss2.parse_rule_list(content, skip_comments=True, skip_whitespace=True) for node in nodes: if node.type == 'at-rule': # tinycss2.parse_one_component_value() returns ParseError feature_type = node.lower_at_keyword name = None feature_values = None for token in node.content: if token.type == 'ident': name = token.value elif token.type == 'literal' and token.value == ':': if name is not None: feature_values = list() elif token.type == 'number' and token.is_integer: if feature_values is not None and token.int_value >= 0: feature_values.append(token.int_value) elif token.type == 'literal' and token.value == ';': if (name is not None and feature_values is not None and len(feature_values) > 0): if feature_type == 'annotation': self._annotation[name] = feature_values elif feature_type == 'character-variant': self._character_variant[name] = feature_values elif feature_type == 'ornaments': self._ornaments[name] = feature_values elif feature_type == 'styleset': self._styleset[name] = feature_values elif feature_type == 'stylistic': self._stylistic[name] = feature_values elif feature_type == 'swash': self._swash[name] = feature_values name = None feature_values = None
def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules, url_fetcher, matcher, page_rules, fonts, font_config, ignore_imports=False): """Do the work that can be done early on stylesheet, before they are in a document. """ for rule in stylesheet_rules: if getattr(rule, 'content', None) is None and ( rule.type != 'at-rule' or rule.lower_at_keyword != 'import'): continue if rule.type == 'qualified-rule': declarations = list(preprocess_declarations( base_url, tinycss2.parse_declaration_list(rule.content))) if declarations: logger_level = WARNING try: selectors = cssselect2.compile_selector_list(rule.prelude) for selector in selectors: matcher.add_selector(selector, declarations) if selector.pseudo_element not in PSEUDO_ELEMENTS: if selector.pseudo_element.startswith('-'): logger_level = DEBUG raise cssselect2.SelectorError( 'ignored prefixed pseudo-element: %s' % selector.pseudo_element) else: raise cssselect2.SelectorError( 'unknown pseudo-element: %s' % selector.pseudo_element) ignore_imports = True except cssselect2.SelectorError as exc: LOGGER.log( logger_level, "Invalid or unsupported selector '%s', %s", tinycss2.serialize(rule.prelude), exc) continue else: ignore_imports = True elif rule.type == 'at-rule' and rule.lower_at_keyword == 'import': if ignore_imports: LOGGER.warning('@import rule "%s" not at the beginning of the ' 'the whole rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue tokens = remove_whitespace(rule.prelude) if tokens and tokens[0].type in ('url', 'string'): url = tokens[0].value else: continue media = media_queries.parse_media_query(tokens[1:]) if media is None: LOGGER.warning('Invalid media type "%s" ' 'the whole @import rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue if not media_queries.evaluate_media_query( media, device_media_type): continue url = url_join( base_url, url, allow_relative=False, context='@import at %s:%s', context_args=(rule.source_line, rule.source_column)) if url is not None: try: CSS( url=url, url_fetcher=url_fetcher, media_type=device_media_type, font_config=font_config, matcher=matcher, page_rules=page_rules) except URLFetchingError as exc: LOGGER.error( 'Failed to load stylesheet at %s : %s', url, exc) elif rule.type == 'at-rule' and rule.lower_at_keyword == 'media': media = media_queries.parse_media_query(rule.prelude) if media is None: LOGGER.warning('Invalid media type "%s" ' 'the whole @media rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue ignore_imports = True if not media_queries.evaluate_media_query( media, device_media_type): continue content_rules = tinycss2.parse_rule_list(rule.content) preprocess_stylesheet( device_media_type, base_url, content_rules, url_fetcher, matcher, page_rules, fonts, font_config, ignore_imports=True) elif rule.type == 'at-rule' and rule.lower_at_keyword == 'page': data = parse_page_selectors(rule) if data is None: LOGGER.warning( 'Unsupported @page selector "%s", ' 'the whole @page rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue ignore_imports = True for page_type in data: specificity = page_type.pop('specificity') page_type = PageType(**page_type) content = tinycss2.parse_declaration_list(rule.content) declarations = list(preprocess_declarations(base_url, content)) if declarations: selector_list = [(specificity, None, page_type)] page_rules.append((rule, selector_list, declarations)) for margin_rule in content: if margin_rule.type != 'at-rule' or ( margin_rule.content is None): continue declarations = list(preprocess_declarations( base_url, tinycss2.parse_declaration_list(margin_rule.content))) if declarations: selector_list = [( specificity, '@' + margin_rule.lower_at_keyword, page_type)] page_rules.append( (margin_rule, selector_list, declarations)) elif rule.type == 'at-rule' and rule.lower_at_keyword == 'font-face': ignore_imports = True content = tinycss2.parse_declaration_list(rule.content) rule_descriptors = dict(preprocess_descriptors(base_url, content)) for key in ('src', 'font_family'): if key not in rule_descriptors: LOGGER.warning( "Missing %s descriptor in '@font-face' rule at %s:%s", key.replace('_', '-'), rule.source_line, rule.source_column) break else: if font_config is not None: font_filename = font_config.add_font_face( rule_descriptors, url_fetcher) if font_filename: fonts.append(font_filename)
def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules, url_fetcher, matcher, page_rules, fonts, font_config, counter_style, ignore_imports=False): """Do the work that can be done early on stylesheet, before they are in a document. """ for rule in stylesheet_rules: if getattr(rule, 'content', None) is None and ( rule.type != 'at-rule' or rule.lower_at_keyword != 'import'): continue if rule.type == 'qualified-rule': declarations = list( preprocess_declarations( base_url, tinycss2.parse_declaration_list(rule.content))) if declarations: logger_level = WARNING try: selectors = cssselect2.compile_selector_list(rule.prelude) for selector in selectors: matcher.add_selector(selector, declarations) if selector.pseudo_element not in PSEUDO_ELEMENTS: if selector.pseudo_element.startswith('-'): logger_level = DEBUG raise cssselect2.SelectorError( 'ignored prefixed pseudo-element: ' f'{selector.pseudo_element}') else: raise cssselect2.SelectorError( 'unknown pseudo-element: ' f'{selector.pseudo_element}') ignore_imports = True except cssselect2.SelectorError as exc: LOGGER.log(logger_level, "Invalid or unsupported selector '%s', %s", tinycss2.serialize(rule.prelude), exc) continue else: ignore_imports = True elif rule.type == 'at-rule' and rule.lower_at_keyword == 'import': if ignore_imports: LOGGER.warning( '@import rule %r not at the beginning of the ' 'the whole rule was ignored at %d:%d.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue tokens = remove_whitespace(rule.prelude) url = None if tokens: if tokens[0].type == 'string': url = url_join(base_url, tokens[0].value, allow_relative=False, context='@import at %s:%s', context_args=(rule.source_line, rule.source_column)) else: url_tuple = get_url(tokens[0], base_url) if url_tuple and url_tuple[1][0] == 'external': url = url_tuple[1][1] if url is None: continue media = media_queries.parse_media_query(tokens[1:]) if media is None: LOGGER.warning( 'Invalid media type %r ' 'the whole @import rule was ignored at %d:%d.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue if not media_queries.evaluate_media_query(media, device_media_type): continue if url is not None: try: CSS(url=url, url_fetcher=url_fetcher, media_type=device_media_type, font_config=font_config, counter_style=counter_style, matcher=matcher, page_rules=page_rules) except URLFetchingError as exc: LOGGER.error('Failed to load stylesheet at %s : %s', url, exc) elif rule.type == 'at-rule' and rule.lower_at_keyword == 'media': media = media_queries.parse_media_query(rule.prelude) if media is None: LOGGER.warning( 'Invalid media type %r ' 'the whole @media rule was ignored at %d:%d.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue ignore_imports = True if not media_queries.evaluate_media_query(media, device_media_type): continue content_rules = tinycss2.parse_rule_list(rule.content) preprocess_stylesheet(device_media_type, base_url, content_rules, url_fetcher, matcher, page_rules, fonts, font_config, counter_style, ignore_imports=True) elif rule.type == 'at-rule' and rule.lower_at_keyword == 'page': data = parse_page_selectors(rule) if data is None: LOGGER.warning( 'Unsupported @page selector %r, ' 'the whole @page rule was ignored at %d:%d.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue ignore_imports = True for page_type in data: specificity = page_type.pop('specificity') page_type = PageType(**page_type) content = tinycss2.parse_declaration_list(rule.content) declarations = list(preprocess_declarations(base_url, content)) if declarations: selector_list = [(specificity, None, page_type)] page_rules.append((rule, selector_list, declarations)) for margin_rule in content: if margin_rule.type != 'at-rule' or (margin_rule.content is None): continue declarations = list( preprocess_declarations( base_url, tinycss2.parse_declaration_list( margin_rule.content))) if declarations: selector_list = [ (specificity, f'@{margin_rule.lower_at_keyword}', page_type) ] page_rules.append( (margin_rule, selector_list, declarations)) elif rule.type == 'at-rule' and rule.lower_at_keyword == 'font-face': ignore_imports = True content = tinycss2.parse_declaration_list(rule.content) rule_descriptors = dict( preprocess_descriptors('font-face', base_url, content)) for key in ('src', 'font_family'): if key not in rule_descriptors: LOGGER.warning( "Missing %s descriptor in '@font-face' rule at %d:%d", key.replace('_', '-'), rule.source_line, rule.source_column) break else: if font_config is not None: font_filename = font_config.add_font_face( rule_descriptors, url_fetcher) if font_filename: fonts.append(font_filename) elif (rule.type == 'at-rule' and rule.lower_at_keyword == 'counter-style'): name = counters.parse_counter_style_name(rule.prelude, counter_style) if name is None: LOGGER.warning( 'Invalid counter style name %r, the whole ' '@counter-style rule was ignored at %d:%d.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue ignore_imports = True content = tinycss2.parse_declaration_list(rule.content) counter = { 'system': None, 'negative': None, 'prefix': None, 'suffix': None, 'range': None, 'pad': None, 'fallback': None, 'symbols': None, 'additive_symbols': None, } rule_descriptors = dict( preprocess_descriptors('counter-style', base_url, content)) for descriptor_name, descriptor_value in rule_descriptors.items(): counter[descriptor_name] = descriptor_value if counter['system'] is None: system = (None, 'symbolic', None) else: system = counter['system'] if system[0] is None: if system[1] in ('cyclic', 'fixed', 'symbolic'): if len(counter['symbols'] or []) < 1: LOGGER.warning( 'In counter style %r at %d:%d, ' 'counter style %r needs at least one symbol', name, rule.source_line, rule.source_column, system[1]) continue elif system[1] in ('alphabetic', 'numeric'): if len(counter['symbols'] or []) < 2: LOGGER.warning( 'In counter style %r at %d:%d, ' 'counter style %r needs at least two symbols', name, rule.source_line, rule.source_column, system[1]) continue elif system[1] == 'additive': if len(counter['additive_symbols'] or []) < 2: LOGGER.warning( 'In counter style %r at %d:%d, ' 'counter style "additive" ' 'needs at least two additive symbols', name, rule.source_line, rule.source_column) continue counter_style[name] = counter
def test_bad_unicode(): parse_one_declaration('background:\udca9') parse_rule_list('@\udca9')
def test_serialize_rules(): source = '@import "a.css"; foo#bar.baz { color: red } /**/ @media print{}' rules = parse_rule_list(source) assert serialize(rules) == source
def test_rule_list(input): return parse_rule_list(input, **SKIP)
def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules, url_fetcher, matcher, page_rules, fonts, font_config, ignore_imports=False): """Do the work that can be done early on stylesheet, before they are in a document. """ for rule in stylesheet_rules: if getattr(rule, 'content', None) is None and ( rule.type != 'at-rule' or rule.lower_at_keyword != 'import'): continue if rule.type == 'qualified-rule': declarations = list( preprocess_declarations( base_url, tinycss2.parse_declaration_list(rule.content))) if declarations: logger_level = WARNING try: selectors = cssselect2.compile_selector_list(rule.prelude) for selector in selectors: matcher.add_selector(selector, declarations) if selector.pseudo_element not in PSEUDO_ELEMENTS: if selector.pseudo_element.startswith('-'): logger_level = DEBUG raise cssselect2.SelectorError( 'ignored prefixed pseudo-element: %s' % selector.pseudo_element) else: raise cssselect2.SelectorError( 'unknown pseudo-element: %s' % selector.pseudo_element) ignore_imports = True except cssselect2.SelectorError as exc: LOGGER.log(logger_level, "Invalid or unsupported selector '%s', %s", tinycss2.serialize(rule.prelude), exc) continue else: ignore_imports = True elif rule.type == 'at-rule' and rule.lower_at_keyword == 'import': if ignore_imports: LOGGER.warning( '@import rule "%s" not at the beginning of the ' 'the whole rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue tokens = remove_whitespace(rule.prelude) if tokens and tokens[0].type in ('url', 'string'): url = tokens[0].value else: continue media = parse_media_query(tokens[1:]) if media is None: LOGGER.warning( 'Invalid media type "%s" ' 'the whole @import rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue if not evaluate_media_query(media, device_media_type): continue url = url_join(base_url, url, allow_relative=False, context='@import at %s:%s', context_args=(rule.source_line, rule.source_column)) if url is not None: try: CSS(url=url, url_fetcher=url_fetcher, media_type=device_media_type, font_config=font_config, matcher=matcher, page_rules=page_rules) except URLFetchingError as exc: LOGGER.error('Failed to load stylesheet at %s : %s', url, exc) elif rule.type == 'at-rule' and rule.lower_at_keyword == 'media': media = parse_media_query(rule.prelude) if media is None: LOGGER.warning( 'Invalid media type "%s" ' 'the whole @media rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue ignore_imports = True if not evaluate_media_query(media, device_media_type): continue content_rules = tinycss2.parse_rule_list(rule.content) preprocess_stylesheet(device_media_type, base_url, content_rules, url_fetcher, matcher, page_rules, fonts, font_config, ignore_imports=True) elif rule.type == 'at-rule' and rule.lower_at_keyword == 'page': data = parse_page_selectors(rule) if data is None: LOGGER.warning( 'Unsupported @page selector "%s", ' 'the whole @page rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue ignore_imports = True for page_type in data: specificity = page_type.pop('specificity') page_type = PageType(**page_type) # Use a double lambda to have a closure that holds page_types match = (lambda page_type: lambda page_names: list( matching_page_types(page_type, names=page_names)) )(page_type) content = tinycss2.parse_declaration_list(rule.content) declarations = list(preprocess_declarations(base_url, content)) if declarations: selector_list = [(specificity, None, match)] page_rules.append((rule, selector_list, declarations)) for margin_rule in content: if margin_rule.type != 'at-rule' or (margin_rule.content is None): continue declarations = list( preprocess_declarations( base_url, tinycss2.parse_declaration_list( margin_rule.content))) if declarations: selector_list = [ (specificity, '@' + margin_rule.lower_at_keyword, match) ] page_rules.append( (margin_rule, selector_list, declarations)) elif rule.type == 'at-rule' and rule.lower_at_keyword == 'font-face': ignore_imports = True content = tinycss2.parse_declaration_list(rule.content) rule_descriptors = dict(preprocess_descriptors(base_url, content)) for key in ('src', 'font_family'): if key not in rule_descriptors: LOGGER.warning( "Missing %s descriptor in '@font-face' rule at %s:%s", key.replace('_', '-'), rule.source_line, rule.source_column) break else: if font_config is not None: font_filename = font_config.add_font_face( rule_descriptors, url_fetcher) if font_filename: fonts.append(font_filename)
def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules, url_fetcher, rules, fonts, font_config): """Do the work that can be done early on stylesheet, before they are in a document. """ selector_to_xpath = cssselect.HTMLTranslator().selector_to_xpath for rule in stylesheet_rules: if rule.type == 'qualified-rule': declarations = list( preprocess_declarations( base_url, tinycss2.parse_declaration_list(rule.content))) if declarations: selector_string = tinycss2.serialize(rule.prelude) try: selector_list = [] for selector in cssselect.parse(selector_string): xpath = selector_to_xpath(selector) try: lxml_xpath = lxml.etree.XPath(xpath) except ValueError as exc: # TODO: Some characters are not supported by lxml's # XPath implementation (including control # characters), but these characters are valid in # the CSS2.1 specification. raise cssselect.SelectorError(str(exc)) selector_list.append( Selector((0, ) + selector.specificity(), selector.pseudo_element, lxml_xpath)) for selector in selector_list: if selector.pseudo_element not in PSEUDO_ELEMENTS: raise cssselect.ExpressionError( 'Unknown pseudo-element: %s' % selector.pseudo_element) except cssselect.SelectorError as exc: LOGGER.warning("Invalid or unsupported selector '%s', %s", selector_string, exc) continue rules.append((rule, selector_list, declarations)) elif rule.type == 'at-rule' and rule.at_keyword == 'import': tokens = remove_whitespace(rule.prelude) if tokens and tokens[0].type in ('url', 'string'): url = tokens[0].value else: continue media = parse_media_query(tokens[1:]) if media is None: LOGGER.warning( 'Invalid media type "%s" ' 'the whole @import rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) if not evaluate_media_query(media, device_media_type): continue url = url_join(base_url, url, allow_relative=False, context='@import at %s:%s', context_args=(rule.source_line, rule.source_column)) if url is not None: try: stylesheet = CSS(url=url, url_fetcher=url_fetcher, media_type=device_media_type, font_config=font_config) except URLFetchingError as exc: LOGGER.warning('Failed to load stylesheet at %s : %s', url, exc) else: for result in stylesheet.rules: rules.append(result) elif rule.type == 'at-rule' and rule.at_keyword == 'media': media = parse_media_query(rule.prelude) if media is None: LOGGER.warning( 'Invalid media type "%s" ' 'the whole @media rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue if not evaluate_media_query(media, device_media_type): continue content_rules = tinycss2.parse_rule_list(rule.content) preprocess_stylesheet(device_media_type, base_url, content_rules, url_fetcher, rules, fonts, font_config) elif rule.type == 'at-rule' and rule.at_keyword == 'page': tokens = remove_whitespace(rule.prelude) # TODO: support named pages (see CSS3 Paged Media) if not tokens: pseudo_class = None specificity = (0, 0) elif (len(tokens) == 2 and tokens[0].type == 'literal' and tokens[0].value == ':' and tokens[1].type == 'ident'): pseudo_class = tokens[1].lower_value specificity = { 'first': (1, 0), 'blank': (1, 0), 'left': (0, 1), 'right': (0, 1), }.get(pseudo_class) if not specificity: LOGGER.warning( 'Unknown @page pseudo-class "%s", ' 'the whole @page rule was ignored ' 'at %s:%s.', pseudo_class, rule.source_line, rule.source_column) continue else: LOGGER.warning( 'Unsupported @page selector "%s", ' 'the whole @page rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue content = tinycss2.parse_declaration_list(rule.content) declarations = list(preprocess_declarations(base_url, content)) # Use a double lambda to have a closure that holds page_types match = (lambda page_types: lambda _document: page_types)( PAGE_PSEUDOCLASS_TARGETS[pseudo_class]) if declarations: selector_list = [Selector(specificity, None, match)] rules.append((rule, selector_list, declarations)) for margin_rule in content: if margin_rule.type != 'at-rule': continue declarations = list( preprocess_declarations( base_url, tinycss2.parse_declaration_list(margin_rule.content))) if declarations: selector_list = [ Selector(specificity, '@' + margin_rule.at_keyword, match) ] rules.append((margin_rule, selector_list, declarations)) elif rule.type == 'at-rule' and rule.at_keyword == 'font-face': content = tinycss2.parse_declaration_list(rule.content) rule_descriptors = dict(preprocess_descriptors(base_url, content)) for key in ('src', 'font_family'): if key not in rule_descriptors: LOGGER.warning( "Missing %s descriptor in '@font-face' rule at %s:%s", key.replace('_', '-'), rule.source_line, rule.source_column) break else: if font_config is not None: font_filename = font_config.add_font_face( rule_descriptors, url_fetcher) if font_filename: fonts.append(font_filename)
def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules, url_fetcher, matcher, page_rules, fonts, font_config): """Do the work that can be done early on stylesheet, before they are in a document. """ for rule in stylesheet_rules: if rule.type == 'qualified-rule': declarations = list( preprocess_declarations( base_url, tinycss2.parse_declaration_list(rule.content))) if declarations: try: selectors = cssselect2.compile_selector_list(rule.prelude) for selector in selectors: matcher.add_selector(selector, declarations) if selector.pseudo_element not in PSEUDO_ELEMENTS: raise cssselect2.SelectorError( 'Unknown pseudo-element: %s' % selector.pseudo_element) except cssselect2.SelectorError as exc: LOGGER.warning("Invalid or unsupported selector '%s', %s", tinycss2.serialize(rule.prelude), exc) continue elif rule.type == 'at-rule' and rule.at_keyword == 'import': tokens = remove_whitespace(rule.prelude) if tokens and tokens[0].type in ('url', 'string'): url = tokens[0].value else: continue media = parse_media_query(tokens[1:]) if media is None: LOGGER.warning( 'Invalid media type "%s" ' 'the whole @import rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) if not evaluate_media_query(media, device_media_type): continue url = url_join(base_url, url, allow_relative=False, context='@import at %s:%s', context_args=(rule.source_line, rule.source_column)) if url is not None: try: CSS(url=url, url_fetcher=url_fetcher, media_type=device_media_type, font_config=font_config, matcher=matcher, page_rules=page_rules) except URLFetchingError as exc: LOGGER.error('Failed to load stylesheet at %s : %s', url, exc) elif rule.type == 'at-rule' and rule.at_keyword == 'media': media = parse_media_query(rule.prelude) if media is None: LOGGER.warning( 'Invalid media type "%s" ' 'the whole @media rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue if not evaluate_media_query(media, device_media_type): continue content_rules = tinycss2.parse_rule_list(rule.content) preprocess_stylesheet(device_media_type, base_url, content_rules, url_fetcher, matcher, page_rules, fonts, font_config) elif rule.type == 'at-rule' and rule.at_keyword == 'page': tokens = remove_whitespace(rule.prelude) types = { 'side': None, 'blank': False, 'first': False, 'name': None } # TODO: Specificity is probably wrong, should clean and test that. if not tokens: specificity = (0, 0, 0) elif (len(tokens) == 2 and tokens[0].type == 'literal' and tokens[0].value == ':' and tokens[1].type == 'ident'): pseudo_class = tokens[1].lower_value if pseudo_class in ('left', 'right'): types['side'] = pseudo_class specificity = (0, 0, 1) elif pseudo_class in ('blank', 'first'): types[pseudo_class] = True specificity = (0, 1, 0) else: LOGGER.warning( 'Unknown @page pseudo-class "%s", ' 'the whole @page rule was ignored ' 'at %s:%s.', pseudo_class, rule.source_line, rule.source_column) continue elif len(tokens) == 1 and tokens[0].type == 'ident': types['name'] = tokens[0].value specificity = (1, 0, 0) else: LOGGER.warning( 'Unsupported @page selector "%s", ' 'the whole @page rule was ignored at %s:%s.', tinycss2.serialize(rule.prelude), rule.source_line, rule.source_column) continue page_type = PageType(**types) # Use a double lambda to have a closure that holds page_types match = (lambda page_type: lambda page_names: list( matching_page_types(page_type, names=page_names)))(page_type) content = tinycss2.parse_declaration_list(rule.content) declarations = list(preprocess_declarations(base_url, content)) if declarations: selector_list = [(specificity, None, match)] page_rules.append((rule, selector_list, declarations)) for margin_rule in content: if margin_rule.type != 'at-rule': continue declarations = list( preprocess_declarations( base_url, tinycss2.parse_declaration_list(margin_rule.content))) if declarations: selector_list = [(specificity, '@' + margin_rule.at_keyword, match)] page_rules.append( (margin_rule, selector_list, declarations)) elif rule.type == 'at-rule' and rule.at_keyword == 'font-face': content = tinycss2.parse_declaration_list(rule.content) rule_descriptors = dict(preprocess_descriptors(base_url, content)) for key in ('src', 'font_family'): if key not in rule_descriptors: LOGGER.warning( "Missing %s descriptor in '@font-face' rule at %s:%s", key.replace('_', '-'), rule.source_line, rule.source_column) break else: if font_config is not None: font_filename = font_config.add_font_face( rule_descriptors, url_fetcher) if font_filename: fonts.append(font_filename)