def get_links_from_css(self, style_text, item): ''' This function extracts urls from css style text and returns requests for download thees images. Also in this function we are replacing urls to absolute uri to replace it by local url ''' response = item['response'] sheet = CSSStyleSheet() sheet.cssText = style_text urls = cssutils.getUrls(sheet) requests = [] item_content = item['content'] for url in urls: request_url = response.url.replace('http://', '') if url[0] == '/': request_url = request_url.split('/')[0] + url else: request_url = request_url.split('/') request_url[-1] = url request_url = '/'.join(request_url) request_url = 'http://%s' % request_url item_content = item_content.replace(url, request_url) requests.append(Request(request_url)) item['content'] = item_content return requests
def _concatenate_sheets(self): if self.dirty or (self._cached_stylesheet is None): r = CSSStyleSheet() uri_properties = [] for d in self.sheets: local_loader = d.get('local_loader', None) text = d.get('text', None) uri = d.get('uri', None) absolute_url = d.get('absolute_url', None) if (text is None) and local_loader and uri: text = local_loader[uri] if text: sheet = CSSParser().parseString(text, href=absolute_url) else: sheet = cssutils.parseUrl(href=absolute_url) for rule in sheet: r.add(rule) #print __name__, "rule=", rule for p in _get_rule_uri_properties(rule): #print __name__, "_get_rule_uri_properties:", p uri_properties.append(p) self._uri_properties = uri_properties #print __name__, "self._uri_properties=", self._uri_properties self._cached_stylesheet = r self.dirty = False
def parse_css_stylesheet(content): from datetime import datetime start = datetime.now() sheet = CSSStyleSheet() try: sheet.cssText = content except Exception: # Parsing failed parser.process_content(content, contexts.CSS_UNKNOWN) for rule in sheet.cssRules: parse_css_rule(rule) end = datetime.now() library.css_us += end - start
def __init__(self, property_parser=ClassPropertyParser()): message = 'CSSBuilder Running...' print(message) logging.debug(msg=message) self.property_parser = property_parser self.css_rules = set() self.css_stylesheet = CSSStyleSheet() invalid_css_classes = [] reasons = [] for css_class in self.property_parser.class_set: name = self.property_parser.get_property_name(css_class=css_class) # 'name' can return an empty string '' if css_class does not match any patterns in the property_alias_dict. try: encoded_property_value = self.property_parser.get_encoded_property_value( property_name=name, css_class=css_class ) except ValueError: invalid_css_classes.append(css_class) reasons.append(' (property_name not found in property_alias_dict.)') continue priority = self.property_parser.get_property_priority(css_class=css_class) value = self.property_parser.get_property_value( property_name=name, encoded_property_value=encoded_property_value ) # Build CSS Property AND Add to css_rules OR Remove invalid css_class from class_set. try: css_property = Property(name=name, value=value, priority=priority) if css_property.valid: selector = self.build_selector(str(css_class)) css_rule = CSSStyleRule(selectorText=selector.selectorText, style=css_property.cssText) self.css_rules.add(css_rule) else: invalid_css_classes.append(css_class) reasons.append(' (cssutils invalid property value: ' + value + ')') continue # This exception can't be tested as clean_class_set() and get_property_value() prevent it.(Triple Redundant) except SyntaxErr: # Special Case - Not Tested invalid_css_classes.append(css_class) reasons.append(' (cssutils SyntaxErr invalid property value: ' + value + ')') continue # Clean out invalid CSS Classes. for i, invalid_css_class in enumerate(invalid_css_classes): self.property_parser.class_set.remove(invalid_css_class) self.property_parser.removed_class_set.add(invalid_css_class + reasons[i]) self.build_stylesheet()
def get_media_requests(self, item, info): sheet = CSSStyleSheet() sheet.cssText = item['content'] urls = cssutils.getUrls(sheet) return [Request(u) for u in urls]
def getView(self, document, sheet, media='all', name=None, styleCallback=None): """ document a DOM document, currently an lxml HTML document sheet a CSS StyleSheet object, currently cssutils sheet media: optional TODO: view for which media it should be name: optional TODO: names of sheets only styleCallback: optional should return css.CSSStyleDeclaration of inline styles, for html a style declaration for ``element@style``. Gets one parameter ``element`` which is the relevant DOMElement returns style view a dict of {DOMElement: css.CSSStyleDeclaration} for html """ styleCallback = styleCallback or self.styleattribute _unmergable_rules = CSSStyleSheet() view = {} specificities = {} # needed temporarily # TODO: filter rules simpler?, add @media rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE) for rule in rules: for selector in rule.selectorList: self.log(0, 'SELECTOR', selector.selectorText) # TODO: make this a callback to be able to use other stuff than lxml try: cssselector = CSSSelector(selector.selectorText) except (ExpressionError, NotImplementedError) as e: _unmergable_rules.add(CSSStyleRule(selectorText=selector.selectorText, style=rule.style)) continue matching = cssselector.evaluate(document) for element in matching: if element.tag in self.NONVISUAL_TAGS: continue # add styles for all matching DOM elements self.log(1, 'ELEMENT', id(element), element.text) if element not in view: # add initial empty style declatation view[element] = CSSStyleDeclaration() specificities[element] = {} # and add inline @style if present inlinestyle = styleCallback(element) if inlinestyle: for p in inlinestyle: # set inline style specificity view[element].setProperty(p) specificities[element][p.name] = (1, 0, 0, 0) for p in rule.style: # update style declaration if p not in view[element]: # setProperty needs a new Property object and # MUST NOT reuse the existing Property # which would be the same for all elements! # see Issue #23 view[element].setProperty(p.name, p.value, p.priority) specificities[element][p.name] = selector.specificity self.log(2, view[element].getProperty('color')) else: self.log(2, view[element].getProperty('color')) sameprio = (p.priority == view[element].getPropertyPriority(p.name)) if not sameprio and bool(p.priority) or ( sameprio and selector.specificity >= specificities[element][p.name]): # later, more specific or higher prio view[element].setProperty(p.name, p.value, p.priority) _unmergable_css = _unmergable_rules.cssText if _unmergable_css: e = etree.Element('style') # print __name__, _unmergable_css.__repr__() e.text = to_unicode(_unmergable_css, 'utf-8') body = document.find('body') or document body.insert(0, e) # add <style> right into body return view
def __init__(self, base_url=None, css=None): self.stylesheet = CSSStyleSheet(href=base_url) self.base_url = base_url if css: self.add_css(css)
class CSSInliner: NONVISUAL_TAGS = ['html', 'head', 'title', 'meta', 'link', 'script'] DEBUG = False def __init__(self, base_url=None, css=None): self.stylesheet = CSSStyleSheet(href=base_url) self.base_url = base_url if css: self.add_css(css) def add_css(self, css, href=None): if isinstance(css, string_types): css = CSSParser().parseString(css, href=href) # Распарсим файл for rule in css: """ if rule.type == rule.STYLE_RULE: for property in rule.style: if property.name.find('background')>=0: _v = property.value property.value = self.change_css_background( property.value, base_url = sheet.href ) #print '[after]', property.name,':', property.value """ self.stylesheet.add(rule) def log(self, level, *msg): if self.DEBUG: print(('%s- %s' % (level * '\t ', ' '.join((to_unicode(m or '') for m in msg))))) def styleattribute(self, element): "returns css.CSSStyleDeclaration of inline styles, for html: @style" cssText = element.get('style') if cssText: try: return CSSStyleDeclaration(cssText=cssText) except Exception as e: # Sometimes here's error like "COLOR: ;" logging.exception('Exception in styleattribute %s', cssText) return None else: return None def getView(self, document, sheet, media='all', name=None, styleCallback=None): """ document a DOM document, currently an lxml HTML document sheet a CSS StyleSheet object, currently cssutils sheet media: optional TODO: view for which media it should be name: optional TODO: names of sheets only styleCallback: optional should return css.CSSStyleDeclaration of inline styles, for html a style declaration for ``element@style``. Gets one parameter ``element`` which is the relevant DOMElement returns style view a dict of {DOMElement: css.CSSStyleDeclaration} for html """ styleCallback = styleCallback or self.styleattribute _unmergable_rules = CSSStyleSheet() view = {} specificities = {} # needed temporarily # TODO: filter rules simpler?, add @media rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE) for rule in rules: for selector in rule.selectorList: self.log(0, 'SELECTOR', selector.selectorText) # TODO: make this a callback to be able to use other stuff than lxml try: cssselector = CSSSelector(selector.selectorText) except (ExpressionError, NotImplementedError) as e: _unmergable_rules.add(CSSStyleRule(selectorText=selector.selectorText, style=rule.style)) continue matching = cssselector.evaluate(document) for element in matching: if element.tag in self.NONVISUAL_TAGS: continue # add styles for all matching DOM elements self.log(1, 'ELEMENT', id(element), element.text) if element not in view: # add initial empty style declatation view[element] = CSSStyleDeclaration() specificities[element] = {} # and add inline @style if present inlinestyle = styleCallback(element) if inlinestyle: for p in inlinestyle: # set inline style specificity view[element].setProperty(p) specificities[element][p.name] = (1, 0, 0, 0) for p in rule.style: # update style declaration if p not in view[element]: # setProperty needs a new Property object and # MUST NOT reuse the existing Property # which would be the same for all elements! # see Issue #23 view[element].setProperty(p.name, p.value, p.priority) specificities[element][p.name] = selector.specificity self.log(2, view[element].getProperty('color')) else: self.log(2, view[element].getProperty('color')) sameprio = (p.priority == view[element].getPropertyPriority(p.name)) if not sameprio and bool(p.priority) or ( sameprio and selector.specificity >= specificities[element][p.name]): # later, more specific or higher prio view[element].setProperty(p.name, p.value, p.priority) _unmergable_css = _unmergable_rules.cssText if _unmergable_css: e = etree.Element('style') # print __name__, _unmergable_css.__repr__() e.text = to_unicode(_unmergable_css, 'utf-8') body = document.find('body') or document body.insert(0, e) # add <style> right into body return view def transform(self, html): if isinstance(html, string_types): html = etree.HTML(html, parser=etree.HTMLParser()) view = self.getView(html, self.stylesheet) # - add style into @style attribute for element, style in list(view.items()): v = style.getCssText(separator='') element.set('style', v) return html transform_html = transform # compatibility
class CSSBuilder(object): def __init__(self, property_parser=ClassPropertyParser()): print(u'\nCSSBuilder Running...\n') self.property_parser = property_parser self.css_rules = set() self.css_stylesheet = CSSStyleSheet() invalid_css_classes = [] reasons = [] for css_class in self.property_parser.class_set: name = self.property_parser.get_property_name(css_class=css_class) # 'name' can return an empty string '' if css_class does not match any patterns in the property_alias_dict. try: encoded_property_value = self.property_parser.get_encoded_property_value( property_name=name, css_class=css_class ) except ValueError: invalid_css_classes.append(css_class) reasons.append(' (property_name not found in property_alias_dict.)') continue priority = self.property_parser.get_property_priority(css_class=css_class) value = self.property_parser.get_property_value( property_name=name, encoded_property_value=encoded_property_value ) # Build CSS Property AND Add to css_rules OR Remove invalid css_class from class_set. try: css_property = Property(name=name, value=value, priority=priority) if css_property.valid: css_class = '.' + css_class # prepend dot selector to class name. css_rule = CSSStyleRule(selectorText=css_class, style=css_property.cssText) self.css_rules.add(css_rule) else: invalid_css_classes.append(css_class) reasons.append(' (cssutils invalid property value: ' + value + ')') continue # This exception can't be tested as clean_class_set() and get_property_value() prevent it.(Triple Redundant) except SyntaxErr: # Special Case - Not Tested invalid_css_classes.append(css_class) reasons.append(' (cssutils SyntaxErr invalid property value: ' + value + ')') continue # Clean out invalid CSS Classes. for i, invalid_css_class in enumerate(invalid_css_classes): self.property_parser.class_set.remove(invalid_css_class) self.property_parser.removed_class_set.add(invalid_css_class + reasons[i]) self.build_stylesheet() def build_stylesheet(self): """ Builds the stylesheet by adding CSS rules to the CSS stylesheet. :return: None """ for css_rule in self.css_rules: self.css_stylesheet.add(rule=css_rule) def get_css_text(self): """ :return: str -- Returns CSS text. """ return self.css_stylesheet.cssText
class CSSBuilder(object): """ Builds CSS text with the ``cssutils.css`` module. **Note:** Removes invalid classes. A class is invalid for one of the following reasons: - It is not valid CSS. - It does not contain a valid ``blowdrycss`` encoding. **Object initialization process:** - Build CSS property rules - Add to css_rules, OR remove invalid css_class from class_set. - Build a CSS stylesheet based on the CSS ``css_rules`` set. | **Parameters: property_parser** (*ClassPropertyParser object*) -- Contains a class property parser with a populated class_set. | **Returns:** None """ def __init__(self, property_parser=ClassPropertyParser()): message = 'CSSBuilder Running...' print(message) logging.debug(msg=message) self.property_parser = property_parser self.css_rules = set() self.css_stylesheet = CSSStyleSheet() invalid_css_classes = [] reasons = [] for css_class in self.property_parser.class_set: name = self.property_parser.get_property_name(css_class=css_class) # 'name' can return an empty string '' if css_class does not match any patterns in the property_alias_dict. try: encoded_property_value = self.property_parser.get_encoded_property_value( property_name=name, css_class=css_class ) except ValueError: invalid_css_classes.append(css_class) reasons.append(' (property_name not found in property_alias_dict.)') continue priority = self.property_parser.get_property_priority(css_class=css_class) value = self.property_parser.get_property_value( property_name=name, encoded_property_value=encoded_property_value ) # Build CSS Property AND Add to css_rules OR Remove invalid css_class from class_set. try: css_property = Property(name=name, value=value, priority=priority) if css_property.valid: selector = self.build_selector(str(css_class)) css_rule = CSSStyleRule(selectorText=selector.selectorText, style=css_property.cssText) self.css_rules.add(css_rule) else: invalid_css_classes.append(css_class) reasons.append(' (cssutils invalid property value: ' + value + ')') continue # This exception can't be tested as clean_class_set() and get_property_value() prevent it.(Triple Redundant) except SyntaxErr: # Special Case - Not Tested invalid_css_classes.append(css_class) reasons.append(' (cssutils SyntaxErr invalid property value: ' + value + ')') continue # Clean out invalid CSS Classes. for i, invalid_css_class in enumerate(invalid_css_classes): self.property_parser.class_set.remove(invalid_css_class) self.property_parser.removed_class_set.add(invalid_css_class + reasons[i]) self.build_stylesheet() def build_selector(self, css_class=''): """ Builds a CSS selector by prepending a ``'.'`` to ``css_class``, and appending an optional pseudo item. **Rules** - Always append a ``'.'`` to ``css_class``. - If a pseudo class is found append ``':' + pseuedo_class`` to ``css_class``. - If a pseudo element is found append ``'::' + pseudo_element`` to ``css_class``. :type css_class: str :param css_class: This value may or may not be identical to the property_value. :return: *str* -- The selector with a '.' prepended and an option pseudo item appended. """ self.property_parser.set_pseudo_class(css_class) self.property_parser.set_pseudo_element(css_class) css_class = '.' + css_class if self.property_parser.pseudo_class: selector = Selector(css_class + ':' + self.property_parser.pseudo_class) elif self.property_parser.pseudo_element: selector = Selector(css_class + '::' + self.property_parser.pseudo_element) else: selector = Selector(css_class) return selector def build_stylesheet(self): """ Builds the stylesheet by adding CSS rules to the CSS stylesheet. :return: None """ for css_rule in self.css_rules: self.css_stylesheet.add(rule=css_rule) def get_css_text(self): """ :return: str -- Returns CSS text. """ return self.css_stylesheet.cssText