示例#1
0
 def get_links_from_css(self, style_text, item):
     '''
         This function extracts urls from css style text
         and returns requests for download thees images.
         Also in this function we are replacing urls to
         absolute uri to replace it by local url
     '''
     response = item['response']
     sheet = CSSStyleSheet()
     sheet.cssText = style_text
     urls = cssutils.getUrls(sheet)
     requests = []
     item_content = item['content']
     for url in urls:
         request_url = response.url.replace('http://', '')
         if url[0] == '/':
             request_url = request_url.split('/')[0] + url
         else:
             request_url = request_url.split('/')
             request_url[-1] = url
             request_url = '/'.join(request_url)
         request_url = 'http://%s' % request_url
         item_content = item_content.replace(url, request_url)
         requests.append(Request(request_url))
     item['content'] = item_content
     return requests
示例#2
0
 def get_links_from_css(self, style_text, item):
     '''
         This function extracts urls from css style text
         and returns requests for download thees images.
         Also in this function we are replacing urls to
         absolute uri to replace it by local url
     '''
     response = item['response']
     sheet = CSSStyleSheet()
     sheet.cssText = style_text
     urls = cssutils.getUrls(sheet)
     requests = []
     item_content = item['content']
     for url in urls:
         request_url = response.url.replace('http://', '')
         if url[0] == '/':
             request_url = request_url.split('/')[0] + url
         else:
             request_url = request_url.split('/')
             request_url[-1] = url
             request_url = '/'.join(request_url)
         request_url = 'http://%s' % request_url
         item_content = item_content.replace(url, request_url)
         requests.append(Request(request_url))
     item['content'] = item_content
     return requests
示例#3
0
    def _concatenate_sheets(self):
        if self.dirty or (self._cached_stylesheet is None):
            r = CSSStyleSheet()
            uri_properties = []

            for d in self.sheets:
                local_loader = d.get('local_loader', None)
                text = d.get('text', None)
                uri = d.get('uri', None)
                absolute_url = d.get('absolute_url', None)

                if (text is None) and local_loader and uri:
                    text = local_loader[uri]

                if text:
                    sheet = CSSParser().parseString(text, href=absolute_url)
                else:
                    sheet = cssutils.parseUrl(href=absolute_url)

                for rule in sheet:
                    r.add(rule)
                    #print __name__, "rule=", rule
                    for p in _get_rule_uri_properties(rule):
                        #print __name__, "_get_rule_uri_properties:", p
                        uri_properties.append(p)

            self._uri_properties = uri_properties
            #print __name__, "self._uri_properties=", self._uri_properties
            self._cached_stylesheet = r
            self.dirty = False
示例#4
0
def parse_css_stylesheet(content):
    from datetime import datetime
    start = datetime.now()
    sheet = CSSStyleSheet()
    try:
        sheet.cssText = content
    except Exception:
        # Parsing failed
        parser.process_content(content, contexts.CSS_UNKNOWN)
    for rule in sheet.cssRules:
        parse_css_rule(rule)
    end = datetime.now()
    library.css_us += end - start
示例#5
0
    def __init__(self, property_parser=ClassPropertyParser()):
        message = 'CSSBuilder Running...'
        print(message)
        logging.debug(msg=message)
        self.property_parser = property_parser
        self.css_rules = set()
        self.css_stylesheet = CSSStyleSheet()

        invalid_css_classes = []
        reasons = []
        for css_class in self.property_parser.class_set:
            name = self.property_parser.get_property_name(css_class=css_class)

            # 'name' can return an empty string '' if css_class does not match any patterns in the property_alias_dict.
            try:
                encoded_property_value = self.property_parser.get_encoded_property_value(
                    property_name=name,
                    css_class=css_class
                )
            except ValueError:
                invalid_css_classes.append(css_class)
                reasons.append(' (property_name not found in property_alias_dict.)')
                continue

            priority = self.property_parser.get_property_priority(css_class=css_class)
            value = self.property_parser.get_property_value(
                property_name=name,
                encoded_property_value=encoded_property_value
            )
            # Build CSS Property AND Add to css_rules OR Remove invalid css_class from class_set.
            try:
                css_property = Property(name=name, value=value, priority=priority)
                if css_property.valid:
                    selector = self.build_selector(str(css_class))
                    css_rule = CSSStyleRule(selectorText=selector.selectorText, style=css_property.cssText)
                    self.css_rules.add(css_rule)
                else:
                    invalid_css_classes.append(css_class)
                    reasons.append(' (cssutils invalid property value: ' + value + ')')
                    continue
            # This exception can't be tested as clean_class_set() and get_property_value() prevent it.(Triple Redundant)
            except SyntaxErr:   # Special Case - Not Tested
                invalid_css_classes.append(css_class)
                reasons.append(' (cssutils SyntaxErr invalid property value: ' + value + ')')
                continue

        # Clean out invalid CSS Classes.
        for i, invalid_css_class in enumerate(invalid_css_classes):
            self.property_parser.class_set.remove(invalid_css_class)
            self.property_parser.removed_class_set.add(invalid_css_class + reasons[i])

        self.build_stylesheet()
示例#6
0
 def get_media_requests(self, item, info):
     sheet = CSSStyleSheet()
     sheet.cssText = item['content']
     urls = cssutils.getUrls(sheet)
     return [Request(u) for u in urls]
示例#7
0
    def getView(self, document, sheet, media='all', name=None, styleCallback=None):
        """
        document
            a DOM document, currently an lxml HTML document
        sheet
            a CSS StyleSheet object, currently cssutils sheet
        media: optional
            TODO: view for which media it should be
        name: optional
            TODO: names of sheets only
        styleCallback: optional
            should return css.CSSStyleDeclaration of inline styles, for html
            a style declaration for ``element@style``. Gets one parameter
            ``element`` which is the relevant DOMElement

        returns style view
            a dict of {DOMElement: css.CSSStyleDeclaration} for html
        """

        styleCallback = styleCallback or self.styleattribute

        _unmergable_rules = CSSStyleSheet()

        view = {}
        specificities = {}  # needed temporarily

        # TODO: filter rules simpler?, add @media
        rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
        for rule in rules:
            for selector in rule.selectorList:
                self.log(0, 'SELECTOR', selector.selectorText)
                # TODO: make this a callback to be able to use other stuff than lxml
                try:
                    cssselector = CSSSelector(selector.selectorText)
                except (ExpressionError, NotImplementedError) as e:
                    _unmergable_rules.add(CSSStyleRule(selectorText=selector.selectorText,
                                                       style=rule.style))
                    continue

                matching = cssselector.evaluate(document)

                for element in matching:

                        if element.tag in self.NONVISUAL_TAGS:
                            continue

                        # add styles for all matching DOM elements
                        self.log(1, 'ELEMENT', id(element), element.text)

                        if element not in view:
                            # add initial empty style declatation
                            view[element] = CSSStyleDeclaration()
                            specificities[element] = {}

                            # and add inline @style if present
                            inlinestyle = styleCallback(element)
                            if inlinestyle:
                                for p in inlinestyle:
                                    # set inline style specificity
                                    view[element].setProperty(p)
                                    specificities[element][p.name] = (1, 0, 0, 0)

                        for p in rule.style:
                            # update style declaration
                            if p not in view[element]:
                                # setProperty needs a new Property object and
                                # MUST NOT reuse the existing Property
                                # which would be the same for all elements!
                                # see Issue #23
                                view[element].setProperty(p.name, p.value, p.priority)
                                specificities[element][p.name] = selector.specificity
                                self.log(2, view[element].getProperty('color'))

                            else:
                                self.log(2, view[element].getProperty('color'))
                                sameprio = (p.priority ==
                                            view[element].getPropertyPriority(p.name))
                                if not sameprio and bool(p.priority) or (
                                   sameprio and selector.specificity >=
                                        specificities[element][p.name]):
                                    # later, more specific or higher prio
                                    view[element].setProperty(p.name, p.value, p.priority)

        _unmergable_css = _unmergable_rules.cssText
        if _unmergable_css:
            e = etree.Element('style')
            # print __name__, _unmergable_css.__repr__()
            e.text = to_unicode(_unmergable_css, 'utf-8')
            body = document.find('body') or document
            body.insert(0, e)  # add <style> right into body

        return view
示例#8
0
    def __init__(self, base_url=None, css=None):

        self.stylesheet = CSSStyleSheet(href=base_url)
        self.base_url = base_url
        if css:
            self.add_css(css)
示例#9
0
class CSSInliner:

    NONVISUAL_TAGS = ['html', 'head', 'title', 'meta', 'link', 'script']

    DEBUG = False

    def __init__(self, base_url=None, css=None):

        self.stylesheet = CSSStyleSheet(href=base_url)
        self.base_url = base_url
        if css:
            self.add_css(css)

    def add_css(self, css, href=None):

        if isinstance(css, string_types):
            css = CSSParser().parseString(css, href=href)  # Распарсим файл

        for rule in css:
            """
            if rule.type == rule.STYLE_RULE:
                for property in rule.style:
                    if property.name.find('background')>=0:
                       _v = property.value
                       property.value = self.change_css_background( property.value, base_url = sheet.href )
                       #print '[after]', property.name,':', property.value
            """
            self.stylesheet.add(rule)

    def log(self, level, *msg):
        if self.DEBUG:
            print(('%s- %s' % (level * '\t ', ' '.join((to_unicode(m or '') for m in msg)))))

    def styleattribute(self, element):
            "returns css.CSSStyleDeclaration of inline styles, for html: @style"
            cssText = element.get('style')
            if cssText:
                try:
                    return CSSStyleDeclaration(cssText=cssText)
                except Exception as e:
                    # Sometimes here's error like "COLOR: ;"
                    logging.exception('Exception in styleattribute %s', cssText)
                    return None
            else:
                return None

    def getView(self, document, sheet, media='all', name=None, styleCallback=None):
        """
        document
            a DOM document, currently an lxml HTML document
        sheet
            a CSS StyleSheet object, currently cssutils sheet
        media: optional
            TODO: view for which media it should be
        name: optional
            TODO: names of sheets only
        styleCallback: optional
            should return css.CSSStyleDeclaration of inline styles, for html
            a style declaration for ``element@style``. Gets one parameter
            ``element`` which is the relevant DOMElement

        returns style view
            a dict of {DOMElement: css.CSSStyleDeclaration} for html
        """

        styleCallback = styleCallback or self.styleattribute

        _unmergable_rules = CSSStyleSheet()

        view = {}
        specificities = {}  # needed temporarily

        # TODO: filter rules simpler?, add @media
        rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
        for rule in rules:
            for selector in rule.selectorList:
                self.log(0, 'SELECTOR', selector.selectorText)
                # TODO: make this a callback to be able to use other stuff than lxml
                try:
                    cssselector = CSSSelector(selector.selectorText)
                except (ExpressionError, NotImplementedError) as e:
                    _unmergable_rules.add(CSSStyleRule(selectorText=selector.selectorText,
                                                       style=rule.style))
                    continue

                matching = cssselector.evaluate(document)

                for element in matching:

                        if element.tag in self.NONVISUAL_TAGS:
                            continue

                        # add styles for all matching DOM elements
                        self.log(1, 'ELEMENT', id(element), element.text)

                        if element not in view:
                            # add initial empty style declatation
                            view[element] = CSSStyleDeclaration()
                            specificities[element] = {}

                            # and add inline @style if present
                            inlinestyle = styleCallback(element)
                            if inlinestyle:
                                for p in inlinestyle:
                                    # set inline style specificity
                                    view[element].setProperty(p)
                                    specificities[element][p.name] = (1, 0, 0, 0)

                        for p in rule.style:
                            # update style declaration
                            if p not in view[element]:
                                # setProperty needs a new Property object and
                                # MUST NOT reuse the existing Property
                                # which would be the same for all elements!
                                # see Issue #23
                                view[element].setProperty(p.name, p.value, p.priority)
                                specificities[element][p.name] = selector.specificity
                                self.log(2, view[element].getProperty('color'))

                            else:
                                self.log(2, view[element].getProperty('color'))
                                sameprio = (p.priority ==
                                            view[element].getPropertyPriority(p.name))
                                if not sameprio and bool(p.priority) or (
                                   sameprio and selector.specificity >=
                                        specificities[element][p.name]):
                                    # later, more specific or higher prio
                                    view[element].setProperty(p.name, p.value, p.priority)

        _unmergable_css = _unmergable_rules.cssText
        if _unmergable_css:
            e = etree.Element('style')
            # print __name__, _unmergable_css.__repr__()
            e.text = to_unicode(_unmergable_css, 'utf-8')
            body = document.find('body') or document
            body.insert(0, e)  # add <style> right into body

        return view

    def transform(self, html):

        if isinstance(html, string_types):
            html = etree.HTML(html, parser=etree.HTMLParser())

        view = self.getView(html, self.stylesheet)

        # - add style into @style attribute
        for element, style in list(view.items()):
            v = style.getCssText(separator='')
            element.set('style', v)

        return html

    transform_html = transform  # compatibility
示例#10
0
class CSSBuilder(object):
    def __init__(self, property_parser=ClassPropertyParser()):
        print(u'\nCSSBuilder Running...\n')
        self.property_parser = property_parser
        self.css_rules = set()
        self.css_stylesheet = CSSStyleSheet()

        invalid_css_classes = []
        reasons = []
        for css_class in self.property_parser.class_set:
            name = self.property_parser.get_property_name(css_class=css_class)

            # 'name' can return an empty string '' if css_class does not match any patterns in the property_alias_dict.
            try:
                encoded_property_value = self.property_parser.get_encoded_property_value(
                    property_name=name,
                    css_class=css_class
                )
            except ValueError:
                invalid_css_classes.append(css_class)
                reasons.append(' (property_name not found in property_alias_dict.)')
                continue

            priority = self.property_parser.get_property_priority(css_class=css_class)
            value = self.property_parser.get_property_value(
                property_name=name,
                encoded_property_value=encoded_property_value
            )
            # Build CSS Property AND Add to css_rules OR Remove invalid css_class from class_set.
            try:
                css_property = Property(name=name, value=value, priority=priority)
                if css_property.valid:
                    css_class = '.' + css_class                         # prepend dot selector to class name.
                    css_rule = CSSStyleRule(selectorText=css_class, style=css_property.cssText)
                    self.css_rules.add(css_rule)
                else:
                    invalid_css_classes.append(css_class)
                    reasons.append(' (cssutils invalid property value: ' + value + ')')
                    continue
            # This exception can't be tested as clean_class_set() and get_property_value() prevent it.(Triple Redundant)
            except SyntaxErr:   # Special Case - Not Tested
                invalid_css_classes.append(css_class)
                reasons.append(' (cssutils SyntaxErr invalid property value: ' + value + ')')
                continue

        # Clean out invalid CSS Classes.
        for i, invalid_css_class in enumerate(invalid_css_classes):
            self.property_parser.class_set.remove(invalid_css_class)
            self.property_parser.removed_class_set.add(invalid_css_class + reasons[i])

        self.build_stylesheet()

    def build_stylesheet(self):
        """ Builds the stylesheet by adding CSS rules to the CSS stylesheet.

        :return: None
        """
        for css_rule in self.css_rules:
            self.css_stylesheet.add(rule=css_rule)

    def get_css_text(self):
        """
        :return: str -- Returns CSS text.
        """
        return self.css_stylesheet.cssText
示例#11
0
class CSSBuilder(object):
    """ Builds CSS text with the ``cssutils.css`` module.

    **Note:** Removes invalid classes. A class is invalid for one of the following reasons:

    - It is not valid CSS.
    - It does not contain a valid ``blowdrycss`` encoding.

    **Object initialization process:**

    - Build CSS property rules
    - Add to css_rules, OR remove invalid css_class from class_set.
    - Build a CSS stylesheet based on the CSS ``css_rules`` set.

    | **Parameters: property_parser** (*ClassPropertyParser object*) -- Contains a class property parser with a
      populated class_set.
    | **Returns:** None

    """
    def __init__(self, property_parser=ClassPropertyParser()):
        message = 'CSSBuilder Running...'
        print(message)
        logging.debug(msg=message)
        self.property_parser = property_parser
        self.css_rules = set()
        self.css_stylesheet = CSSStyleSheet()

        invalid_css_classes = []
        reasons = []
        for css_class in self.property_parser.class_set:
            name = self.property_parser.get_property_name(css_class=css_class)

            # 'name' can return an empty string '' if css_class does not match any patterns in the property_alias_dict.
            try:
                encoded_property_value = self.property_parser.get_encoded_property_value(
                    property_name=name,
                    css_class=css_class
                )
            except ValueError:
                invalid_css_classes.append(css_class)
                reasons.append(' (property_name not found in property_alias_dict.)')
                continue

            priority = self.property_parser.get_property_priority(css_class=css_class)
            value = self.property_parser.get_property_value(
                property_name=name,
                encoded_property_value=encoded_property_value
            )
            # Build CSS Property AND Add to css_rules OR Remove invalid css_class from class_set.
            try:
                css_property = Property(name=name, value=value, priority=priority)
                if css_property.valid:
                    selector = self.build_selector(str(css_class))
                    css_rule = CSSStyleRule(selectorText=selector.selectorText, style=css_property.cssText)
                    self.css_rules.add(css_rule)
                else:
                    invalid_css_classes.append(css_class)
                    reasons.append(' (cssutils invalid property value: ' + value + ')')
                    continue
            # This exception can't be tested as clean_class_set() and get_property_value() prevent it.(Triple Redundant)
            except SyntaxErr:   # Special Case - Not Tested
                invalid_css_classes.append(css_class)
                reasons.append(' (cssutils SyntaxErr invalid property value: ' + value + ')')
                continue

        # Clean out invalid CSS Classes.
        for i, invalid_css_class in enumerate(invalid_css_classes):
            self.property_parser.class_set.remove(invalid_css_class)
            self.property_parser.removed_class_set.add(invalid_css_class + reasons[i])

        self.build_stylesheet()

    def build_selector(self, css_class=''):
        """ Builds a CSS selector by prepending a ``'.'`` to ``css_class``, and appending an optional pseudo item.

        **Rules**

        - Always append a ``'.'`` to ``css_class``.

        - If a pseudo class is found append ``':' + pseuedo_class`` to ``css_class``.

        - If a pseudo element is found append ``'::' + pseudo_element`` to ``css_class``.

        :type css_class: str

        :param css_class: This value may or may not be identical to the property_value.
        :return: *str* -- The selector with a '.' prepended and an option pseudo item appended.

        """
        self.property_parser.set_pseudo_class(css_class)
        self.property_parser.set_pseudo_element(css_class)

        css_class = '.' + css_class

        if self.property_parser.pseudo_class:
            selector = Selector(css_class + ':' + self.property_parser.pseudo_class)
        elif self.property_parser.pseudo_element:
            selector = Selector(css_class + '::' + self.property_parser.pseudo_element)
        else:
            selector = Selector(css_class)

        return selector

    def build_stylesheet(self):
        """ Builds the stylesheet by adding CSS rules to the CSS stylesheet.

        :return: None
        """
        for css_rule in self.css_rules:
            self.css_stylesheet.add(rule=css_rule)

    def get_css_text(self):
        """
        :return: str -- Returns CSS text.
        """
        return self.css_stylesheet.cssText
示例#12
0
 def get_media_requests(self, item, info):
     sheet = CSSStyleSheet()
     sheet.cssText = item['content']
     urls = cssutils.getUrls(sheet)
     return [Request(u) for u in urls]