def parse_stylesheets(tree, url, session=None): """Find and parse the stylesheets in ``tree``. Return two :class:`cssselect2.Matcher` objects, for normal and !important declarations. """ normal_matcher = cssselect2.Matcher() important_matcher = cssselect2.Matcher() for stylesheet in find_stylesheets(tree): for rule in find_stylesheets_rules(tree, stylesheet, url, session=session): normal_declarations, important_declarations = parse_declarations( rule.content) for selector in cssselect2.compile_selector_list(rule.prelude): if (selector.pseudo_element is None and not selector.never_matches): if normal_declarations: normal_matcher.add_selector(selector, normal_declarations) if important_declarations: important_matcher.add_selector(selector, important_declarations) return normal_matcher, important_matcher
def parse_stylesheets(tree, url): """Find stylesheets and return rule matchers in given tree.""" normal_matcher = cssselect2.Matcher() important_matcher = cssselect2.Matcher() # Find stylesheets # TODO: support contentStyleType on <svg> stylesheets = [] for element in tree.etree_element.iter(): # http://www.w3.org/TR/SVG/styling.html#StyleElement if (element.tag == '{http://www.w3.org/2000/svg}style' and element.get('type', 'text/css') == 'text/css' and element.text): # TODO: pass href for relative URLs # TODO: support media types # TODO: what if <style> has children elements? stylesheets.append(tinycss2.parse_stylesheet( element.text, skip_comments=True, skip_whitespace=True)) # Parse rules and fill matchers for stylesheet in stylesheets: for rule in find_stylesheets_rules(tree, stylesheet, url): normal_declarations, important_declarations = parse_declarations( rule.content) for selector in cssselect2.compile_selector_list(rule.prelude): if (selector.pseudo_element is None and not selector.never_matches): if normal_declarations: normal_matcher.add_selector( selector, normal_declarations) if important_declarations: important_matcher.add_selector( selector, important_declarations) return normal_matcher, important_matcher
def __init__(self, guess=None, filename=None, url=None, file_obj=None, string=None, encoding=None, base_url=None, url_fetcher=default_url_fetcher, _check_mime_type=False, media_type='print', font_config=None, counter_style=None, matcher=None, page_rules=None): PROGRESS_LOGGER.info( 'Step 2 - Fetching and parsing CSS - %s', filename or url or getattr(file_obj, 'name', 'CSS string')) result = _select_source( guess, filename, url, file_obj, string, base_url=base_url, url_fetcher=url_fetcher, check_css_mime_type=_check_mime_type) with result as (source_type, source, base_url, protocol_encoding): if source_type == 'string' and not isinstance(source, bytes): # unicode, no encoding stylesheet = tinycss2.parse_stylesheet(source) else: if source_type == 'file_obj': source = source.read() stylesheet, encoding = tinycss2.parse_stylesheet_bytes( source, environment_encoding=encoding, protocol_encoding=protocol_encoding) self.base_url = base_url self.matcher = matcher or cssselect2.Matcher() self.page_rules = [] if page_rules is None else page_rules self.fonts = [] preprocess_stylesheet( media_type, base_url, stylesheet, url_fetcher, self.matcher, self.page_rules, self.fonts, font_config, counter_style)
def select(rule: str, element: et.Element) -> 'PQuery': rules = tinycss2.parse_stylesheet(rule + ' {}', skip_whitespace=True) matcher = cssselect2.Matcher() for rule in rules: selectors = cssselect2.compile_selector_list(rule.prelude) for selector in selectors: matcher.add_selector(selector, None) wrapper = cssselect2.ElementWrapper.from_html_root(element) matching_elements: List[et.Element] = [] for element in wrapper.iter_subtree(): matches = matcher.match(element) if matches: matching_elements.append(element.etree_element) return PQuery(matching_elements)
from xml.etree import ElementTree import cssselect2 import tinycss2 # Parse CSS and add rules to the matcher matcher = cssselect2.Matcher() rules = tinycss2.parse_stylesheet(''' body { font-size: 2em } body p { background: red } p { color: blue } ''', skip_whitespace=True) for rule in rules: selectors = cssselect2.compile_selector_list(rule.prelude) selector_string = tinycss2.serialize(rule.prelude) content_string = tinycss2.serialize(rule.content) payload = (selector_string, content_string) for selector in selectors: matcher.add_selector(selector, payload) # Parse HTML and find CSS rules applying to each tag html_tree = ElementTree.fromstring(''' <html> <body> <p>Test</p> </body>