def perform(self, document, sourceHTML, sourceURL, encoding=None, copy_style=False): aggregate_css = "" # Retrieve CSS rel links from html pasted and aggregate into one string CSSRelSelector = CSSSelector("link[rel=stylesheet],link[rel=StyleSheet],link[rel=STYLESHEET]") matching = CSSRelSelector.evaluate(document) for element in matching: try: csspath = element.get("href") if len(sourceURL): if element.get("href").lower().find("http://", 0) < 0: parsed_url = urlparse.urlparse(sourceURL) csspath = urlparse.urljoin(parsed_url.scheme + "://" + parsed_url.hostname, csspath) css_content = requests.get(csspath).text aggregate_css += ''.join(css_content) element.getparent().remove(element) except: raise IOError('The stylesheet ' + element.get("href") + ' could not be found') # Include inline style elements CSSStyleSelector = CSSSelector("style,Style") matching = CSSStyleSelector.evaluate(document) for element in matching: if element.text: aggregate_css += element.text element.getparent().remove(element) # Convert document to a style dictionary compatible with etree styledict = self.get_view(document, aggregate_css) # Set inline style attribute if not one of the elements not worth styling ignore_list = ['html', 'head', 'title', 'meta', 'link', 'script'] for element, style in styledict.items(): if element.tag not in ignore_list: v = style.getCssText(separator=u'') element.set('style', v) if copy_style: sheet = cssutils.css.CSSStyleSheet() for rule in self.used_rules: sheet.add(rule) style = etree.HTML("<style>" + sheet.cssText + "</style>") document.find('.//body').insert(0, style.find(".//style")) self.convertedHTML = etree.tostring(document, method="xml", pretty_print=True, encoding=encoding) self.convertedHTML = self.convertedHTML.decode(encoding).replace(' ', '') # Tedious raw conversion of line breaks. return self
def perform(self, document, sourceHTML, sourceURL, encoding=None): aggregate_css = "" # Retrieve CSS rel links from html pasted and aggregate into one string CSSRelSelector = CSSSelector("link[rel=stylesheet],link[rel=StyleSheet],link[rel=STYLESHEET]") matching = CSSRelSelector.evaluate(document) for element in matching: try: csspath = element.get("href") if len(sourceURL): if element.get("href").lower().find("http://", 0) < 0: parsed_url = urlparse.urlparse(sourceURL) csspath = urlparse.urljoin(parsed_url.scheme + "://" + parsed_url.hostname, csspath) css_content = requests.get(csspath).text # section: replace relative url to absolute url in css file lr = lambda match_obj: Conversion.replace_url(csspath, match_obj) css_content = Conversion.REGEX_URL.sub(lr, css_content) # endsection aggregate_css += ''.join(css_content) element.getparent().remove(element) except: raise IOError('The stylesheet ' + element.get("href") + ' could not be found') # Include inline style elements CSSStyleSelector = CSSSelector("style,Style") matching = CSSStyleSelector.evaluate(document) for element in matching: aggregate_css += element.text element.getparent().remove(element) # Convert document to a style dictionary compatible with etree styledict = self.get_view(document, aggregate_css) # Set inline style attribute if not one of the elements not worth styling ignore_list = ['html', 'head', 'title', 'meta', 'link', 'script'] for element, style in styledict.items(): if element.tag not in ignore_list: v = style.getCssText(separator=u'') element.set('style', v) self.convertedHTML = etree.tostring(document, method="xml", pretty_print=True, encoding=encoding) self.convertedHTML = self.convertedHTML.decode(encoding).replace(' ', '') # Tedious raw conversion of line breaks. return self
def perform(self, document, sourceHTML, sourceURL, encoding='unicode', remove_origin=True): aggregate_css = "" # Retrieve CSS rel links from html pasted and aggregate into one string CSSRelSelector = CSSSelector("link[rel=stylesheet],link[rel=StyleSheet],link[rel=STYLESHEET]") matching = CSSRelSelector.evaluate(document) for element in matching: try: csspath = element.get("href") if len(sourceURL): if element.get("href").lower().find("http://", 0) < 0: parsed_url = urlparse.urlparse(sourceURL) csspath = urlparse.urljoin(parsed_url.scheme + "://" + parsed_url.hostname, csspath) # Get css file. Don't verify SSL certificates. It's just a css. # Cloudfront does not have correct SSL certificate and it fails. css_content = requests.get(csspath, verify=False).text aggregate_css += ''.join(css_content) if remove_origin: element.getparent().remove(element) except: raise IOError('The stylesheet ' + element.get("href") + ' could not be found') # Include inline style elements CSSStyleSelector = CSSSelector("style,Style") matching = CSSStyleSelector.evaluate(document) for element in matching: aggregate_css += element.text if remove_origin: element.getparent().remove(element) # Convert document to a style dictionary compatible with etree styledict = self.get_view(document, aggregate_css) # Set inline style attribute if not one of the elements not worth styling ignore_list = ['html', 'head', 'title', 'meta', 'link', 'script'] for element, style in styledict.items(): if element.tag not in ignore_list: v = style.getCssText(separator='') element.set('style', v) self.convertedHTML = etree.tostring(document, method="html", pretty_print=True, encoding=encoding) return self
def perform(self,document,sourceHTML,sourceURL): aggregateCSS=""; # retrieve CSS rel links from html pasted and aggregate into one string CSSRelSelector = CSSSelector("link[rel=stylesheet],link[rel=StyleSheet],link[rel=STYLESHEET]") matching = CSSRelSelector.evaluate(document) for element in matching: try: csspath=element.get("href") if len(sourceURL): if element.get("href").lower().find("http://",0) < 0: parsedUrl=urlparse.urlparse(sourceURL); csspath=urlparse.urljoin(parsedUrl.scheme+"://"+parsedUrl.hostname, csspath) f=urlopen(csspath) aggregateCSS+=''.join(f.read()) element.getparent().remove(element) except: raise IOError('The stylesheet '+element.get("href")+' could not be found') #include inline style elements CSSStyleSelector = CSSSelector("style,Style") matching = CSSStyleSelector.evaluate(document) for element in matching: aggregateCSS+=element.text element.getparent().remove(element) #convert document to a style dictionary compatible with etree styledict = self.getView(document, aggregateCSS) #set inline style attribute if not one of the elements not worth styling ignoreList=['html','head','title','meta','link','script'] for element, style in styledict.items(): if element.tag not in ignoreList: v = style.getCssText(separator=u'') element.set('style', v) #convert tree back to plain text html self.convertedHTML = etree.tostring(document, method="xml", pretty_print=True,encoding='UTF-8') self.convertedHTML= self.convertedHTML.decode('utf-8').replace(' ', '') #tedious raw conversion of line breaks. return self
def perform(self, document, sourceHTML, sourceURL, encoding='unicode'): aggregate_css = "" # Retrieve CSS rel links from html pasted and aggregate into one string CSSRelSelector = CSSSelector( "link[rel=stylesheet],link[rel=StyleSheet],link[rel=STYLESHEET]") matching = CSSRelSelector.evaluate(document) for element in matching: try: csspath = element.get("href") if len(sourceURL): if element.get("href").lower().find("http://", 0) < 0: parsed_url = urlparse.urlparse(sourceURL) csspath = urlparse.urljoin( parsed_url.scheme + "://" + parsed_url.hostname, csspath) if csspath.startswith("http"): css_content = requests.get(csspath).text else: css_content = open(csspath, "r") aggregate_css += ''.join(css_content) element.getparent().remove(element) except: raise IOError('The stylesheet ' + element.get("href") + ' could not be found') # Include inline style elements CSSStyleSelector = CSSSelector("style,Style") matching = CSSStyleSelector.evaluate(document) for element in matching: aggregate_css += element.text element.getparent().remove(element) # Convert document to a style dictionary compatible with etree styledict = self.get_view(document, aggregate_css) # Set inline style attribute if not one of the elements not worth styling ignore_list = ['html', 'head', 'title', 'meta', 'link', 'script'] for element, style in styledict.items(): if element.tag not in ignore_list: v = style.getCssText(separator='') element.set('style', v) self.convertedHTML = etree.tostring(document, method="xml", pretty_print=True, encoding=encoding) return self
def get_view(self, document, css): view = {} specificities = {} supportratios = {} support_failrate = 0 support_totalrate = 0 compliance = dict() mycsv = csv.DictReader(open(os.path.join(os.path.dirname(__file__), "css_compliance.csv")), delimiter=',') for row in mycsv: # Count clients so we can calculate an overall support percentage later client_count = len(row) compliance[row['property'].strip()] = dict(row) # Decrement client count to account for first col which is property name client_count -= 1 sheet = cssutils.parseString(css) rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE) for rule in rules: for selector in rule.selectorList: try: cssselector = CSSSelector(selector.selectorText) matching = cssselector.evaluate(document) for element in matching: # add styles for all matching DOM elements if element not in view: # add initial view[element] = cssutils.css.CSSStyleDeclaration() specificities[element] = {} # add inline style if present inlinestyletext = element.get('style') if inlinestyletext: inlinestyle = cssutils.css.CSSStyleDeclaration(cssText=inlinestyletext) else: inlinestyle = None if inlinestyle: for p in inlinestyle: # set inline style specificity view[element].setProperty(p) specificities[element][p.name] = (1, 0, 0, 0) for p in rule.style: if p.name not in supportratios: supportratios[p.name] = {'usage': 0, 'failedClients': 0} supportratios[p.name]['usage'] += 1 try: if p.name not in self.CSSUnsupportErrors: for client, support in compliance[p.name].items(): if support == "N" or support == "P": # Increment client failure count for this property supportratios[p.name]['failedClients'] += 1 if p.name not in self.CSSUnsupportErrors: if support == "P": self.CSSUnsupportErrors[p.name] = [client + ' (partial support)'] else: self.CSSUnsupportErrors[p.name] = [client] else: if support == "P": self.CSSUnsupportErrors[p.name].append(client + ' (partial support)') else: self.CSSUnsupportErrors[p.name].append(client) except KeyError: pass # update styles if p not in view[element]: view[element].setProperty(p.name, p.value, p.priority) specificities[element][p.name] = selector.specificity else: sameprio = (p.priority == view[element].getPropertyPriority(p.name)) if not sameprio and bool(p.priority) or (sameprio and selector.specificity >= specificities[element][p.name]): # later, more specific or higher prio view[element].setProperty(p.name, p.value, p.priority) except ExpressionError: if str(sys.exc_info()[1]) not in self.CSSErrors: self.CSSErrors.append(str(sys.exc_info()[1])) pass for props, propvals in supportratios.items(): support_failrate += (propvals['usage']) * int(propvals['failedClients']) support_totalrate += int(propvals['usage']) * client_count if support_failrate and support_totalrate: self.supportPercentage = 100 - ((float(support_failrate) / float(support_totalrate)) * 100) return view
def get_view(self, document, css): view = {} specificities = {} supportratios = {} support_failrate = 0 support_totalrate = 0 compliance = dict() with open(os.path.join(os.path.dirname(__file__), "css_compliance.csv")) as csv_file: compat_list = csv_file.readlines() mycsv = csv.DictReader(compat_list, delimiter=str(',')) for row in mycsv: # Count clients so we can calculate an overall support percentage later client_count = len(row) compliance[row['property'].strip()] = dict(row) # Decrement client count to account for first col which is property name client_count -= 1 sheet = cssutils.parseString(css) rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE) for rule in rules: for selector in rule.selectorList: try: cssselector = CSSSelector(selector.selectorText) matching = cssselector.evaluate(document) for element in matching: # add styles for all matching DOM elements if element not in view: # add initial view[element] = cssutils.css.CSSStyleDeclaration() specificities[element] = {} # add inline style if present inlinestyletext = element.get('style') if inlinestyletext: inlinestyle = cssutils.css.CSSStyleDeclaration( cssText=inlinestyletext) else: inlinestyle = None if inlinestyle: for p in inlinestyle: # set inline style specificity view[element].setProperty(p) specificities[element][p.name] = (1, 0, 0, 0) for p in rule.style: if p.name not in supportratios: supportratios[p.name] = { 'usage': 0, 'failedClients': 0 } supportratios[p.name]['usage'] += 1 try: if p.name not in self.CSSUnsupportErrors: for client, support in compliance[ p.name].items(): if support == "N" or support == "P": # Increment client failure count for this property supportratios[ p.name]['failedClients'] += 1 if p.name not in self.CSSUnsupportErrors: if support == "P": self.CSSUnsupportErrors[ p.name] = [ client + ' (partial support)' ] else: self.CSSUnsupportErrors[ p.name] = [client] else: if support == "P": self.CSSUnsupportErrors[ p.name].append( client + ' (partial support)' ) else: self.CSSUnsupportErrors[ p.name].append(client) except KeyError: pass # update styles if p not in view[element]: view[element].setProperty( p.name, p.value, p.priority) specificities[element][ p.name] = selector.specificity else: sameprio = (p.priority == view[element].getPropertyPriority( p.name)) if not sameprio and bool(p.priority) or ( sameprio and selector.specificity >= specificities[element][p.name]): # later, more specific or higher prio view[element].setProperty( p.name, p.value, p.priority) except ExpressionError: if str(sys.exc_info()[1]) not in self.CSSErrors: self.CSSErrors.append(str(sys.exc_info()[1])) pass for props, propvals in supportratios.items(): support_failrate += (propvals['usage']) * int( propvals['failedClients']) support_totalrate += int(propvals['usage']) * client_count if support_failrate and support_totalrate: self.supportPercentage = 100 - ( (float(support_failrate) / float(support_totalrate)) * 100) return view