Exemplo n.º 1
0
    def test_parentRule(self):
        "CSSStyleDeclaration.parentRule"
        s = cssutils.css.CSSStyleDeclaration()
        sheet = cssutils.css.CSSStyleRule()
        s.parentRule = sheet
        self.assertEqual(sheet, s.parentRule)

        sheet = cssutils.parseString(u'a{x:1}')
        s = sheet.cssRules[0]
        d = s.style
        self.assertEqual(s, d.parentRule)
        
        s = cssutils.parseString('''
        @font-face {
            font-weight: bold;
            }
        a {
            font-weight: bolder;
            }
        @page {
            font-weight: bolder;    
            }
        ''')
        for r in s:
            self.assertEqual(r.style.parentRule, r)
    def test_cssRules(self):
        "CSSMediaRule.cssRules"
        r = cssutils.css.CSSMediaRule()
        self.assertEqual([], r.cssRules)
        sr = cssutils.css.CSSStyleRule()
        r.cssRules.append(sr)
        self.assertEqual([sr], r.cssRules)
        ir = cssutils.css.CSSImportRule()
        self.assertRaises(xml.dom.HierarchyRequestErr, r.cssRules.append, ir)

        s = cssutils.parseString('@media all { /*1*/a {x:1} }')
        m = s.cssRules[0]
        self.assertEqual(2, m.cssRules.length)
        del m.cssRules[0]
        self.assertEqual(1, m.cssRules.length)
        m.cssRules.append('/*2*/')
        self.assertEqual(2, m.cssRules.length)
        m.cssRules.extend(cssutils.parseString('/*3*/x {y:2}').cssRules)
        self.assertEqual(4, m.cssRules.length)
        self.assertEqual(u'@media all {\n    a {\n        x: 1\n        }\n    /*2*/\n    /*3*/\n    x {\n        y: 2\n        }\n    }', 
                         m.cssText)
        
        for rule in m.cssRules:
            self.assertEqual(rule.parentStyleSheet, s)
            self.assertEqual(rule.parentRule, m)
 def test_children(self):
     "CSSStyleDeclaration.children()"
     style = u'/*1*/color: red; color: green; @x;'
     types = [
         (cssutils.css.CSSComment, None), 
         (cssutils.css.Property, 'parentStyle'), #DEPRECATED
         (cssutils.css.Property, 'parentStyle'), #DEPRECATED
         (cssutils.css.CSSUnknownRule, None)
     ] 
     def t(s):
         for i, x in enumerate(s.children()):
             self.assertEqual(types[i][0], type(x))
             self.assertEqual(s, x.parent)
             
             if types[i][1]:
                 #DEPRECATED
                 self.assertEqual(s, getattr(x, types[i][1]))
                 
     t(cssutils.parseStyle(style))
     t(cssutils.parseString(u'a {'+style+'}').cssRules[0].style)
     t(cssutils.parseString(u'@media all {a {'+style+'}}').cssRules[0].cssRules[0].style)
             
     s = cssutils.parseStyle(style)
     s['x'] = '0'
     self.assertEqual(s, s.getProperty('x').parent)
     s.setProperty('y', '1')
     self.assertEqual(s, s.getProperty('y').parent)
    def test_parsevalidation(self):
        style = 'color: 1'
        t = 'a { %s }' % style
                
        cssutils.log.setLevel(logging.DEBUG)
        
        # sheet
        s = self._setHandler()
        cssutils.parseString(t)
        self.assertNotEqual(len(s.getvalue()), 0)
        
        s = self._setHandler()
        cssutils.parseString(t, validate=False)
        self.assertEqual(s.getvalue(), '')

        # style
        s = self._setHandler()
        cssutils.parseStyle(style)
        self.assertNotEqual(len(s.getvalue()), 0)

        s = self._setHandler()
        cssutils.parseStyle(style, validate=True)
        self.assertNotEqual(len(s.getvalue()), 0)

        s = self._setHandler()
        cssutils.parseStyle(style, validate=False)
        self.assertEqual(s.getvalue(), '')
Exemplo n.º 5
0
    def test_invalidstring(self):
        "cssutils.parseString(INVALID_STRING)"
        validfromhere = '@namespace "x";'
        csss = (
            u'''@charset "ascii
                ;''' + validfromhere,
            u'''@charset 'ascii
                ;''' + validfromhere,
            u'''@namespace "y
                ;''' + validfromhere,
            u'''@import "y
                ;''' + validfromhere,
            u'''@import url('a
                );''' + validfromhere,
            u'''@unknown "y
                ;''' + validfromhere)
        for css in csss:
            s = cssutils.parseString(css)
            self.assertEqual(validfromhere, s.cssText)

        csss = (u'''a { font-family: "Courier
                ; }''',
                ur'''a { content: "\"; }
                ''',
                ur'''a { content: "\\\"; }
                '''
        )
        for css in csss:
            self.assertEqual(u'', cssutils.parseString(css).cssText)
Exemplo n.º 6
0
    def test_prioriy(self):
        "Property.priority"
        s = cssutils.parseString('a { color: red }')
        self.assertEqual(u'a {\n    color: red\n    }', s.cssText)
        self.assertEqual(u'', s.cssRules[0].style.getPropertyPriority('color'))

        s = cssutils.parseString('a { color: red !important }')
        self.assertEqual(u'a {\n    color: red !important\n    }', s.cssText)
        self.assertEqual(u'important', s.cssRules[0].style.getPropertyPriority('color'))
        
        # invalid but kept!
#        #cssutils.log.raiseExceptions = False
#        s = cssutils.parseString('a { color: red !x }')
#        self.assertEqual(u'a {\n    color: red !x\n    }', s.cssText)
#        self.assertEqual(u'x', s.cssRules[0].style.getPropertyPriority('color'))
        
        cssutils.log.raiseExceptions = True
        p = cssutils.css.Property(u'color', u'red', u'')
        self.assertEqual(p.priority, u'')
        p = cssutils.css.Property(u'color', u'red', u'!important')
        self.assertEqual(p.priority, u'important')
        self.assertRaisesMsg(xml.dom.SyntaxErr, 
                             u'', 
                             cssutils.css.Property, u'color', u'red', u'x')

        cssutils.log.raiseExceptions = False
        p = cssutils.css.Property(u'color', u'red', u'!x')
        self.assertEqual(p.priority, u'x')
        p = cssutils.css.Property(u'color', u'red', u'!x')
        self.assertEqual(p.priority, u'x')
        cssutils.log.raiseExceptions = True
Exemplo n.º 7
0
 def test_insufficient_partial_cascade(self):
     css_text1 = "body {background-color: #00ff00;}"
     style1 = cssutils.parseString(css_text1).cssRules[0].style
     css_text2 = "body {color: #0000ff;}"
     style2 = cssutils.parseString(css_text2).cssRules[0].style
     
     self.assertRaises(ValueError, wx._styles2dict, [style1, style2])
Exemplo n.º 8
0
    def test_resolveImports(self):
        "cssutils.resolveImports(sheet)"
        if mock:
            self._tempSer()
            cssutils.ser.prefs.useMinified()

            a = u'@charset "iso-8859-1";@import"b.css";ä{color:green}'.encode('iso-8859-1')
            b = u'@charset "ascii";\E4 {color:red}'.encode('ascii')
            
            # normal
            mock("cssutils.util._defaultFetcher", 
                 mock_obj=self._make_fetcher(None, b))
            s = cssutils.parseString(a)
            restore()            
            self.assertEqual(a, s.cssText)
            self.assertEqual(b, s.cssRules[1].styleSheet.cssText)
            c = cssutils.resolveImports(s)
            self.assertEqual('\xc3\xa4{color:red}\xc3\xa4{color:green}', 
                             c.cssText)

            c.encoding = 'ascii'
            self.assertEqual(r'@charset "ascii";\E4 {color:red}\E4 {color:green}', 
                             c.cssText)

            # b cannot be found
            mock("cssutils.util._defaultFetcher", 
                 mock_obj=self._make_fetcher(None, None))
            s = cssutils.parseString(a)
            restore()            
            self.assertEqual(a, s.cssText)
            self.assertEqual(None, s.cssRules[1].styleSheet)
            c = cssutils.resolveImports(s)
            self.assertEqual('@import"b.css";\xc3\xa4{color:green}', 
                             c.cssText)

            # @import with media
            a = u'@import"b.css";@import"b.css" print, tv ;@import"b.css" all;'
            b = u'a {color: red}'            
            mock("cssutils.util._defaultFetcher", 
                 mock_obj=self._make_fetcher(None, b))
            s = cssutils.parseString(a)
            restore()            
            c = cssutils.resolveImports(s)
            self.assertEqual('a{color:red}@media print,tv{a{color:red}}a{color:red}', 
                             c.cssText)
            
            # cannot resolve with media => keep original
            a = u'@import"b.css"print;'
            b = u'@namespace "http://example.com";'            
            mock("cssutils.util._defaultFetcher", 
                 mock_obj=self._make_fetcher(None, b))
            s = cssutils.parseString(a)
            restore()            
            c = cssutils.resolveImports(s)
            self.assertEqual(a, c.cssText)


            
        else:
            self.assertEqual(False, u'Minimock needed for this test')
Exemplo n.º 9
0
    def test_attributes(self):
        "cssutils.parseString(href, media)"
        s = cssutils.parseString("a{}", href="file:foo.css", media="screen, projection, tv")
        self.assertEqual(s.href, "file:foo.css")
        self.assertEqual(s.media.mediaText, "screen, projection, tv")

        s = cssutils.parseString("a{}", href="file:foo.css", media=["screen", "projection", "tv"])
        self.assertEqual(s.media.mediaText, "screen, projection, tv")
Exemplo n.º 10
0
 def test_CSSStyleSheet(self):
     "CSSSerializer.do_CSSStyleSheet"
     css = u'/* κουρος */'
     sheet = cssutils.parseString(css)
     self.assertEqual(css, unicode(sheet.cssText, 'utf-8'))
     
     css = u'@charset "utf-8";\n/* κουρος */'
     sheet = cssutils.parseString(css)
     self.assertEqual(css, unicode(sheet.cssText, 'utf-8'))
     sheet.cssRules[0].encoding = 'ascii'
     self.assertEqual('@charset "ascii";\n/* \\3BA \\3BF \\3C5 \\3C1 \\3BF \\3C2  */'.encode(), 
                      sheet.cssText)
Exemplo n.º 11
0
    def test_set(self):
        "settings.set()"
        cssutils.ser.prefs.useMinified()
        text = u'a {filter: progid:DXImageTransform.Microsoft.BasicImage( rotation = 90 )}'
        
        self.assertEqual(cssutils.parseString(text).cssText, ''.encode())
        
        cssutils.settings.set('DXImageTransform.Microsoft', True)
        self.assertEqual(cssutils.parseString(text).cssText,
                         'a{filter:progid:DXImageTransform.Microsoft.BasicImage(rotation=90)}'.encode())

        cssutils.ser.prefs.useDefaults()
Exemplo n.º 12
0
    def test_roundtrip(self):
        "cssutils encodings"
        css1 = ur'''@charset "utf-8";
/* ä */'''
        s = cssutils.parseString(css1)
        css2 = unicode(s.cssText, 'utf-8')
        self.assertEqual(css1, css2)

        s = cssutils.parseString(css2)
        s.cssRules[0].encoding='ascii'
        css3 = ur'''@charset "ascii";
/* \E4  */'''
        self.assertEqual(css3, unicode(s.cssText, 'utf-8'))
Exemplo n.º 13
0
 def test_partial_cascade(self):
     css_text1 = (
         "body {background-color: #00ff00; "
         "font-family: monospace;}")
     style1 = cssutils.parseString(css_text1).cssRules[0].style
     css_text2 = "body {color: #0000ff;}"
     style2 = cssutils.parseString(css_text2).cssRules[0].style
     
     self.assertEqual(wx._styles2dict([style1, style2]),
         {
             'color': (0, 0, 255),
             'background-color': (0, 255, 0),
             'font-family': ['monospace']})
Exemplo n.º 14
0
    def test_escapes(self):
        "cssutils escapes"
        css = ur'\43\x { \43\x: \43\x !import\41nt }'
        sheet = cssutils.parseString(css)
        self.assertEqual(sheet.cssText, ur'''C\x {
    c\x: C\x !important
    }''')

        css = ur'\ x{\ x :\ x ;y:1} '
        sheet = cssutils.parseString(css)
        self.assertEqual(sheet.cssText, ur'''\ x {
    \ x: \ x;
    y: 1
    }''')
Exemplo n.º 15
0
 def test_propertyNameSpacer(self):
     "Preferences.propertyNameSpacer"
     css = 'a { x: 1; y: 2 }'
     s = cssutils.parseString(css)
     self.assertEqual(u'a {\n    x: 1;\n    y: 2\n    }'.encode(), s.cssText)
     cssutils.ser.prefs.propertyNameSpacer = u''
     self.assertEqual(u'a {\n    x:1;\n    y:2\n    }'.encode(), s.cssText)
Exemplo n.º 16
0
 def test_omitLastSemicolon(self):
     "Preferences.omitLastSemicolon"
     css = 'a { x: 1; y: 2 }'
     s = cssutils.parseString(css)
     self.assertEqual(u'a {\n    x: 1;\n    y: 2\n    }'.encode(), s.cssText)
     cssutils.ser.prefs.omitLastSemicolon = False
     self.assertEqual(u'a {\n    x: 1;\n    y: 2;\n    }'.encode(), s.cssText)
Exemplo n.º 17
0
 def test_keepUsedNamespaceRulesOnly(self):
     "Preferences.keepUsedNamespaceRulesOnly"
     tests = {
         # default == prefix => both are combined
         '@namespace p "u"; @namespace "u"; p|a, a {top: 0}':
             ('@namespace "u";\na, a {\n    top: 0\n    }',
              '@namespace "u";\na, a {\n    top: 0\n    }'),
         '@namespace "u"; @namespace p "u"; p|a, a {top: 0}':
             ('@namespace p "u";\np|a, p|a {\n    top: 0\n    }',
              '@namespace p "u";\np|a, p|a {\n    top: 0\n    }'),
         # default and prefix
         '@namespace p "u"; @namespace "d"; p|a, a {top: 0}':
             ('@namespace p "u";\n@namespace "d";\np|a, a {\n    top: 0\n    }',
              '@namespace p "u";\n@namespace "d";\np|a, a {\n    top: 0\n    }'),
         # prefix only
         '@namespace p "u"; @namespace "d"; p|a {top: 0}':
             ('@namespace p "u";\n@namespace "d";\np|a {\n    top: 0\n    }',
              '@namespace p "u";\np|a {\n    top: 0\n    }'),
         # default only
         '@namespace p "u"; @namespace "d"; a {top: 0}':
             ('@namespace p "u";\n@namespace "d";\na {\n    top: 0\n    }',
              '@namespace "d";\na {\n    top: 0\n    }'),
         # prefix-ns only
         '@namespace p "u"; @namespace d "d"; p|a {top: 0}':
             ('@namespace p "u";\n@namespace d "d";\np|a {\n    top: 0\n    }',
              '@namespace p "u";\np|a {\n    top: 0\n    }'),
     }
     for test in tests:
         s = cssutils.parseString(test)
         expwith, expwithout = tests[test]
         cssutils.ser.prefs.keepUsedNamespaceRulesOnly = False
         self.assertEqual(s.cssText, expwith.encode())
         cssutils.ser.prefs.keepUsedNamespaceRulesOnly = True
         self.assertEqual(s.cssText, expwithout.encode())
Exemplo n.º 18
0
    def test_keepUnknownAtRules(self):
        "Preferences.keepUnknownAtRules"
        tests = {
            u'''@three-dee {
              @background-lighting {
                azimuth: 30deg;
                elevation: 190deg;
              }
              h1 { color: red }
            }
            h1 { color: blue }''': (u'''@three-dee {
    @background-lighting {
        azimuth: 30deg;
        elevation: 190deg;
        } h1 {
        color: red
        }
    }
h1 {
    color: blue
    }''', u'''h1 {
    color: blue
    }''')
        }
        for test in tests:
            s = cssutils.parseString(test)
            expwith, expwithout = tests[test]
            cssutils.ser.prefs.keepUnknownAtRules = True
            self.assertEqual(s.cssText, expwith.encode())
            cssutils.ser.prefs.keepUnknownAtRules = False
            self.assertEqual(s.cssText, expwithout.encode())
Exemplo n.º 19
0
 def test_keepComments(self):
     "Preferences.keepComments"
     s = cssutils.parseString('/*1*/ a { /*2*/ }')
     cssutils.ser.prefs.keepComments = False
     self.assertEqual(''.encode(), s.cssText)
     cssutils.ser.prefs.keepEmptyRules = True
     self.assertEqual('a {}'.encode(), s.cssText)
Exemplo n.º 20
0
def html_css_stylesheet():
    global _html_css_stylesheet
    if _html_css_stylesheet is None:
        html_css = open(os.path.join(os.path.dirname(__file__), 'html.css'), 'rb').read()
        _html_css_stylesheet = parseString(html_css, validate=False)
        _html_css_stylesheet.namespaces['h'] = XHTML_NS
    return _html_css_stylesheet
Exemplo n.º 21
0
Arquivo: DFT.py Projeto: ml31415/thug
    def handle_style(self, style):
        log.info(style)

        sheet = cssutils.parseString(style.text)
        for rule in sheet:
            if rule.type == rule.FONT_FACE_RULE:
                self.do_handle_font_face_rule(rule)
Exemplo n.º 22
0
 def getStyleElementRules(self, htmltext):
     """Given an htmltext,
     return the CSS rules contained in the content"""
     compiledstyle = ""
     stylesheet = cssutils.css.CSSStyleSheet()
     myparser = etree.HTMLParser(encoding="utf-8")
     tree = etree.HTML(htmltext, parser=myparser)
     styleelements = tree.xpath('//style')
     for styleelt in styleelements:
         if styleelt.text != None:
             compiledstyle = compiledstyle + styleelt.text
         else:
             logging.debug("STYLE ELEMENT %s on %s" % (styleelements.index(styleelt) + 1, len(styleelements)))
     if compiledstyle != None:
         cssutils.ser.prefs.indentClosingBrace = False
         cssutils.ser.prefs.keepComments = False
         cssutils.ser.prefs.lineSeparator = u''
         cssutils.ser.prefs.omitLastSemicolon = False
         try:
             stylesheet = cssutils.parseString(compiledstyle)
         except ValueError as e:
             logging.info("BOGUS STYLE RULE: %s" % (e.message))
     else:
         raise ValueError("STYLE ELEMENT: no CSS Rules")
     return stylesheet
Exemplo n.º 23
0
def main():
    # -*- coding: utf-8 -*-
    import cssutils
    import logging
    cssutils.log.setLevel(logging.DEBUG)
    
    css = u'''/* a comment with umlaut ä */
         @namespace html "http://www.w3.org/1999/xhtml";
         @variables { BG: #fff }
         html|a { color:red; background: var(BG) }'''
    sheet = cssutils.parseString(css)
    
    for rule in sheet:
        if rule.type == rule.STYLE_RULE:
            # find property
            for property in rule.style:
                if property.name == 'color':
                    property.value = 'green'
                    property.priority = 'IMPORTANT'
                    break
            # or simply:
            rule.style['margin'] = '01.0eM' # or: ('1em', 'important')
    
    sheet.encoding = 'ascii'
    sheet.namespaces['xhtml'] = 'http://www.w3.org/1999/xhtml'
    sheet.namespaces['atom'] = 'http://www.w3.org/2005/Atom'
    sheet.add('atom|title {color: #000000 !important}')
    sheet.add('@import "sheets/import.css";')
    
    # cssutils.ser.prefs.resolveVariables = True # default since 0.9.7b2
    print sheet.cssText
Exemplo n.º 24
0
    def _css_parse(self, css):
        """
        Parse styling via cssutils modules
        :rtype: dict
        """
        sheet = parseString(css)
        style_sheet = {}

        for rule in sheet:
            new_style = {}
            selector = rule.selectorText.lower()
            if selector[0] in [u"#", u"."]:
                selector = selector[1:]
            # keep any style attributes that are needed
            for prop in rule.style:
                if prop.name == u"color":
                    cv = cssutils_css.ColorValue(prop.value)
                    # Code for RGB to hex conversion comes from
                    # http://bit.ly/1kwfBnQ
                    new_style[u"color"] = u"#%02x%02x%02x" % (cv.red, cv.green, cv.blue)
                else:
                    new_style[prop.name] = prop.value
            if new_style:
                style_sheet[selector] = new_style

        return style_sheet
Exemplo n.º 25
0
    def write(self, css_text=''):
        """ Output a human readable version of the css file in utf-8 format.

        **Notes:**

        - The file is human readable. It is not intended to be human editable as the file is auto-generated.
        - Pre-existing files with the same name are overwritten.

        :type css_text: str

        :param css_text: Text containing the CSS to be written to the file.
        :return: None

        **Example:**

        >>> css_text = '.margin-top-50px { margin-top: 3.125em }'
        >>> css_file = CSSFile()
        >>> css_file.write(css_text=css_text)

        """
        parse_string = parseString(css_text)
        ser.prefs.useDefaults()                # Enables Default / Verbose Mode
        file_path = get_file_path(file_directory=self.file_directory, file_name=self.file_name, extension='.css')
        with open(file_path, 'w') as css_file:
            css_file.write(parse_string.cssText.decode('utf-8'))
Exemplo n.º 26
0
    def __get_field(self, field, cell):
        cell_text = cell.getText().replace('\n', '').strip()

        if field is ProxyField.LastUpdate:
            # Parse time format [{0}h] [{1}min[s]] [{2}sec[s]]
            tex = re.search('^(?:(\d+)(?:h\s*))?(?:(\d+)\s*mins?\s*)?(?:(\d+)\s*secs?)?$', cell_text)
            time = [int(tex.group(i + 1)) if tex.group(i + 1) is not None else 0 for i in range(3)]
            return self.requestTime + datetime.timedelta(hours=time[0], minutes=time[1], seconds=time[2])
        if field is ProxyField.IpAddress:
            # Inline style block used to hide junk elements.
            # Extract all classes with 'display: none' and remove them.
            style = cell.find("style")
            css = cssutils.parseString(style.getText())

            hidden_css_classes = [rule for rule in
                                  [next((re.search("\.(.+)", cssRule.selectorText).group(1) for prop in cssRule.style if
                                         prop.name == "display" and prop.value == "none"), None) for cssRule in
                                   css.cssRules] if
                                  rule is not None]

            # remove style tag, all hidden elements & rebuild cellText now all the hidden stuff is gone
            style.decompose()
            [tag.decompose() for cssClass in hidden_css_classes for tag in cell.findAll(attrs={'class': cssClass})]
            return cell.getText().replace('\n', '').strip()
        elif field is ProxyField.Protocol:
            return self.__match_enum(ProxyProtocol, self.ProtocolPatterns, cell_text, None)
        elif field is ProxyField.Speed or field is ProxyField.ConnectionTime:
            indicator = cell.find("div", "indicator")
            return re.search("width:\s*(\d+)%;", indicator['style'], re.IGNORECASE).group(1)
        elif field is ProxyField.Anon:
            return self.__match_enum(ProxyAnon, self.AnonPatterns, cell_text, ProxyAnon.Low)
        else:
            return cell_text
Exemplo n.º 27
0
 def test_list(self):
     "PropertyValue[index]"
     # issue #41
     css = """div.one {color: rgb(255, 0, 0);}   """
     sheet = cssutils.parseString(css)    
     pv = sheet.cssRules[0].style.getProperty('color').propertyValue
     self.assertEqual(pv.value, 'rgb(255, 0, 0)')
     self.assertEqual(pv[0].value, 'rgb(255, 0, 0)')
     
     # issue #42
     sheet = cssutils.parseString('body { font-family: "A", b, serif }')
     pv = sheet.cssRules[0].style.getProperty('font-family').propertyValue
     self.assertEqual(3, pv.length)
     self.assertEqual(pv[0].value, 'A')
     self.assertEqual(pv[1].value, 'b')
     self.assertEqual(pv[2].value, 'serif')
Exemplo n.º 28
0
    def _css_parse(self, css):
        # parse via cssutils modules
        sheet = parseString(css)
        style_sheet = {}

        for rule in sheet:
            not_empty = False
            new_style = {}
            selector = rule.selectorText.lower()
            if selector[0] in ['#', '.']:
                selector = selector[1:]
            # keep any style attributes that are needed
            for prop in rule.style:
                if prop.name == 'text-align':
                    new_style['text-align'] = prop.value
                    not_empty = True
                if prop.name == 'font-family':
                    new_style['font-family'] = prop.value
                    not_empty = True
                if prop.name == 'font-size':
                    new_style['font-size'] = prop.value
                    not_empty = True
                if prop.name == 'color':
                    new_style['color'] = _3digit_to_6digit_color(prop.value)
                    not_empty = True
                if prop.name == 'lang':
                    new_style['lang'] = prop.value
                    not_empty = True
            if not_empty:
                style_sheet[selector] = new_style

        return style_sheet
Exemplo n.º 29
0
    def collect_global_css(self):
        global_css = defaultdict(list)
        for item in self.oeb.spine:
            stylizer = self.stylizers[item]
            if float(self.context.margin_top) >= 0:
                stylizer.page_rule['margin-top'] = '%gpt'%\
                        float(self.context.margin_top)
            if float(self.context.margin_bottom) >= 0:
                stylizer.page_rule['margin-bottom'] = '%gpt'%\
                        float(self.context.margin_bottom)
            items = sorted(stylizer.page_rule.items())
            css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
            css = ('@page {\n%s\n}\n'%css) if items else ''
            rules = [r.cssText for r in stylizer.font_face_rules +
                    self.embed_font_rules]
            raw = '\n\n'.join(rules)
            css += '\n\n' + raw
            global_css[css].append(item)

        gc_map = {}
        manifest = self.oeb.manifest
        for css in global_css:
            href = None
            if css.strip():
                id_, href = manifest.generate('page_css', 'page_styles.css')
                manifest.add(id_, href, CSS_MIME, data=cssutils.parseString(css,
                    validate=False))
            gc_map[css] = href

        ans = {}
        for css, items in global_css.iteritems():
            for item in items:
                ans[item] = gc_map[css]
        return ans
Exemplo n.º 30
0
def style_html(html_text, style_css):
    """Applies inline styles to an html_text string from an externa file."""
    styled_html = html_text

    with open(style_css) as css_file:
        css = cssutils.parseString(css_file.read())

    styles = {}

    for rule in css:
        for selector in rule.selectorText.split(", "):
            for prop in rule.style:
                try:
                    styles[selector].append(prop)
                except KeyError:
                    styles[selector] = [prop]

    for selector, prop_list in styles.items():
        inline = " style=\""
        for prop in prop_list:
            inline += "{}:{};".format(prop.name, prop.value)
        inline += "\""
        styled_html = regex.sub("<"+selector, "<"+selector+inline, styled_html)

    return styled_html
Exemplo n.º 31
0
def html_css_stylesheet():
    global _html_css_stylesheet
    if _html_css_stylesheet is None:
        html_css = open(P('templates/html.css'), 'rb').read()
        _html_css_stylesheet = parseString(html_css, validate=False)
    return _html_css_stylesheet
Exemplo n.º 32
0
                                   list_font_basic_properties(c)[1],
                                   list_font_basic_properties(c)[2],
                                   list_font_basic_properties(c)[3]))
                        except (lib.fntutls.UnsupportedFont,
                                struct.error) as e:
                            print('%sERROR! Problem with font file "%s": %s' %
                                  (_file_dec, singlefile, e))

            if os.path.isdir(temp_font_dir):
                shutil.rmtree(temp_font_dir)
        elif singlefile.lower().endswith('.css'):
            with epubfile.open(singlefile) as f:
                cssutils.log.setLog(logging.getLogger(singlefile))
                cssutils.log.addHandler(streamhandler)
                cssutils.log.setLevel(logging.WARNING)
                cssutils.parseString(f.read(), validate=True)
            check_urls_in_css(singlefile, epubfile, prepnl, _file_dec)
            # TODO: not a real problem with file (make separate check for it)
            # is_body_family, is_font_face, ff, sfound\
            #     = check_body_font_family(
            #         singlefile, epubfile, _file_dec,
            #         is_body_family, is_font_face, ff, sfound
            #     )
        else:
            try:
                c = epubfile.read(singlefile)
                for key in entities.iterkeys():
                    c = c.replace(key, entities[key])
                sftree = etree.fromstring(c)
            except:
                sftree = None
Exemplo n.º 33
0
 def do():
     s = cssutils.parseString(a)
     r = cssutils.resolveImports(s)
     return s, r
Exemplo n.º 34
0
    def _update_fson_html_fields_and_screenshot_pending(self):

        for rec in self:

            # Skipp this for email_template versions
            if rec.version_of_email_id:
                continue

            # Update fields 'fso_email_html', 'fso_email_html_parsed' and 'screenshot_pending'
            logger.info("Update fields 'fso_email_html', 'fso_email_html_parsed' and 'screenshot_pending' "
                        "for email.template with id %s" % rec.id)

            # Only update fields if all needed fields are set
            if rec.fso_email_template and rec.fso_template_view_id and rec.body_html:

                # Render the the related theme (ir.ui.view) to get the basic html content of the email body
                email_body = rec.fso_template_view_id.render({'html_sanitize': html_sanitize,
                                                              'email_editor_mode': False,
                                                              'record': rec,
                                                              'print_fields': self.env['fso.print_field'].search([]),
                                                              })

                # Convert html content to a beautiful soup object
                email_body_soup = BeautifulSoup(email_body, "lxml")

                # TODO: Remove any java script tags if "no_java_script" is set in the email.template
                if rec.no_java_script:
                    email_body_soup_java_script_tags = email_body_soup.find_all("script")
                    for jst in email_body_soup_java_script_tags:
                        jst.decompose()

                # Replace print fields in e-mail body html with correct code for Fundraising Studio
                # HINT: http://beautiful-soup-4.readthedocs.io/en/latest/#output
                # HINT: Will auto-detect encoding and convert to unicode
                # HINT: 'class_' is used by html_soup because 'class' is a reserved keyword in python
                email_body_soup_print_fields = email_body_soup.find_all(class_="drop_in_print_field")
                for pf in email_body_soup_print_fields:
                    pf_class = [c for c in pf.get("class", []) if c.startswith("pf_")]
                    pf_span = pf.find_all(class_=pf_class[0])
                    fs_string = pf_span[0].get("data-fs-email-placeholder")
                    pf.replace_with(fs_string)

                # Repair anchors without protocol
                # E.g.: www.google.at > https://www.google.at
                email_body_soup_anchors = email_body_soup.find_all('a')
                for a in email_body_soup_anchors:
                    href = a.get('href', '').strip()
                    if '://' in href or any(href.startswith(x) for x in ('http', 'mailto', '/', '#', '%')):
                        continue
                    else:
                        a['href'] = 'https://' + href

                # Convert beautiful soup object to regular html
                # HINT: keep html entities like &nbsp; by using the formater "html" instead of "minimal"
                # ATTENTION: Do NOT pretty print the document because it may lead to wrong spaces!
                # email_body_prepared = email_body_soup.prettify(formatter="html")
                email_body_prepared = email_body_soup.decode(pretty_print=False, formatter="html")

                # Use premailer to:
                #  - inline CSS and
                #  - convert relative to absolute URLs
                # HINT: This step must done before generating multimailer links
                # ATTENTION: This step will try a lot of requests.packages.urllib3.connectionpool connections
                #            which may lead to long processing times.
                email_body_prepared_premailer = PremailerWithTimeout(email_body_prepared,
                                                                     method='xml',
                                                                     base_url=self.get_base_url(),
                                                                     preserve_internal_links=True,
                                                                     keep_style_tags=False,
                                                                     strip_important=True,
                                                                     align_floating_images=False,
                                                                     remove_unset_properties=True,
                                                                     include_star_selectors=False,
                                                                     cssutils_logging_handler=premailer_log_handler,
                                                                     cssutils_logging_level=logging.FATAL,)
                fso_email_html = email_body_prepared_premailer.transform(pretty_print=False)

                # Convert html content to a beautiful soup object again
                email_body_css_inline_soup = BeautifulSoup(fso_email_html, "lxml")

                # Replace anchors with FRST-Multimailer links
                email_body_css_inline_soup_anchors = email_body_css_inline_soup.find_all('a')
                for a in email_body_css_inline_soup_anchors:
                    href = a.get('href', '').strip()
                    # Handle and fix '%open_browser%' FRST-Multimailer links
                    if '%open_browser%' in href:
                        a['href'] = '%open_browser%'
                        continue
                    # Multimailer Token-Links: Add ?fs_ptoken=%xGuid% to token links for FRST
                    if 'link-withtoken' in a.get('class', ''):
                        token_query = '&fs_ptoken=%xGuid%' if '?' in href else '?fs_ptoken=%xGuid%'
                        href = href+token_query
                        a['href'] = href
                        logger.info("TOKEN QUERY %s " % href)
                    # Skipp rewrite to tracking link if 'link-donottrack' class is set
                    if 'link-donottrack' in a.get('class', ''):
                        continue
                    # Convert to FRST-Multimailer link
                    if '://' in href and href.startswith('http'):
                        protocol, address = href.split('://', 1)
                        a['href'] = '%redirector%/' + protocol + '//' + address

                def cycle_rules(rules):
                    for r in rules:
                        if r.type == r.STYLE_RULE:
                            for p in r.style:
                                p.priority = 'IMPORTANT'
                        elif hasattr(r, 'cssRules'):
                            cycle_rules(r)

                # Add !important to all media queries CSS in the header
                # HINT: Only the media queries will be in style tags in the header (the rest is inlined)
                for styletag in email_body_css_inline_soup.find_all('style'):
                    css = styletag.string
                    css_parsed = cssutils.parseString(css, validate=True)
                    cycle_rules(css_parsed)
                    styletag.string = css_parsed.cssText

                # Convert beautiful soup object back to regular html
                # ATTENTION: Do NOT pretty print the document because it may lead to wrong spaces!
                # fso_email_html_parsed = email_body_css_inline_soup.prettify(formatter="html")
                fso_email_html_parsed = email_body_css_inline_soup.decode(pretty_print=False, formatter="html")

                # Update the email.template fields
                return rec.write({'fso_email_html': fso_email_html,
                                  'fso_email_html_parsed': fso_email_html_parsed,
                                  'screenshot': False,
                                  'screenshot_pending': True})

            # Make sure all fields are unset if any of the mandatory fields are missing
            else:
                if any(rec[f] for f in ['fso_email_html', 'fso_email_html_parsed', 'screenshot']):
                    return rec.write({'fso_email_html': False,
                                      'fso_email_html_parsed': False,
                                      'screenshot': False,
                                      'screenshot_pending': False})
Exemplo n.º 35
0
    def test_keepEmptyRules(self):
        "Preferences.keepEmptyRules"
        # CSSStyleRule
        css = u'''a {}
a {
    /*1*/
    }
a {
    color: red
    }'''
        s = cssutils.parseString(css)
        cssutils.ser.prefs.useDefaults()
        cssutils.ser.prefs.keepEmptyRules = True
        self.assertEqual(css, s.cssText)
        cssutils.ser.prefs.keepEmptyRules = False
        self.assertEqual(u'a {\n    /*1*/\n    }\na {\n    color: red\n    }',
                         s.cssText)
        cssutils.ser.prefs.keepComments = False
        self.assertEqual(u'a {\n    color: red\n    }', s.cssText)

        # CSSMediaRule
        css = u'''@media tv {
    }
@media all {
    /*1*/
    }
@media print {
    a {}
    }
@media print {
    a {
        /*1*/
        }
    }
@media all {
    a {
        color: red
        }
    }'''
        s = cssutils.parseString(css)
        cssutils.ser.prefs.useDefaults()
        cssutils.ser.prefs.keepEmptyRules = True
        #     self.assertEqual(css, s.cssText)
        cssutils.ser.prefs.keepEmptyRules = False
        self.assertEqual(
            '''@media all {
    /*1*/
    }
@media print {
    a {
        /*1*/
        }
    }
@media all {
    a {
        color: red
        }
    }''', s.cssText)
        cssutils.ser.prefs.keepComments = False
        self.assertEqual(
            '''@media all {
    a {
        color: red
        }
    }''', s.cssText)
    def test_parse(self):
        "CSSStyleDeclaration parse"
        # error but parse
        tests = {
            # property names are caseinsensitive
            'TOP:0':
            'top: 0',
            'top:0':
            'top: 0',
            # simple escape
            'c\\olor: red; color:green':
            'color: green',
            'color:g\\reen':
            'color: g\\reen',
            # http://www.w3.org/TR/2009/CR-CSS2-20090423/syndata.html#illegalvalues
            'color:green':
            'color: green',
            'color:green; color':
            'color: green',
            'color:red;   color; color:green':
            'color: green',
            'color:green; color:':
            'color: green',
            'color:red;   color:; color:green':
            'color: green',
            'color:green; color{;color:maroon}':
            'color: green',
            'color:red; color{;color:maroon}; color:green':
            'color: green',
            # tantek hack
            r'''color: red;
voice-family: "\"}\"";
voice-family:inherit;
color: green;''':
            'voice-family: inherit;\ncolor: green',
            r'''col\or: blue;
                font-family: 'Courier New Times
                color: red;
                color: green;''':
            'color: green',
            # special IE hacks are not preserved anymore (>=0.9.5b3)
            '/color: red; color: green':
            'color: green',
            '/ color: red; color: green':
            'color: green',
            '1px: red; color: green':
            'color: green',
            '0: red; color: green':
            'color: green',
            '1px:: red; color: green':
            'color: green',
            r'$top: 0':
            '',
            r'$: 0':
            '',  # really invalid!
            # unknown rule but valid
            '@x;\ncolor: red':
            None,
            '@x {\n    }\ncolor: red':
            None,
            '/**/\ncolor: red':
            None,
            '/**/\ncolor: red;\n/**/':
            None,
            # issue #28
            ';color: red':
            'color: red',
            ';;color: red;;':
            'color: red',
        }
        cssutils.ser.prefs.keepAllProperties = False
        for test, exp in list(tests.items()):
            sh = cssutils.parseString('a { %s }' % test)
            if exp is None:
                exp = '%s' % test
            elif exp != '':
                exp = '%s' % exp
            self.assertEqual(exp, sh.cssRules[0].style.cssText)

        cssutils.ser.prefs.useDefaults()
Exemplo n.º 37
0
class CSSStyleRuleTestCase(test_cssrule.CSSRuleTestCase):
    def setUp(self):
        super(CSSStyleRuleTestCase, self).setUp()
        self.r = cssutils.css.CSSStyleRule()
        self.rRO = cssutils.css.CSSStyleRule(readonly=True)
        self.r_type = cssutils.css.CSSStyleRule.STYLE_RULE
        self.r_typeString = 'STYLE_RULE'

    def test_init(self):
        "CSSStyleRule.type and init"
        super(CSSStyleRuleTestCase, self).test_init()
        self.assertEqual(u'', self.r.cssText)
        self.assertEqual(cssutils.css.selectorlist.SelectorList,
                         type(self.r.selectorList))
        self.assertEqual(u'', self.r.selectorText)
        self.assertEqual(cssutils.css.CSSStyleDeclaration, type(self.r.style))
        self.assertEqual(self.r, self.r.style.parentRule)

    def test_refs(self):
        "CSSStyleRule references"
        s = cssutils.css.CSSStyleRule()
        sel, style = s.selectorList, s.style

        self.assertEqual(s, sel.parentRule)
        self.assertEqual(s, style.parentRule)

        s.cssText = 'a { x:1 }'
        self.assertNotEqual(sel, s.selectorList)
        self.assertEqual('a', s.selectorList.selectorText)
        self.assertNotEqual(style, s.style)
        self.assertEqual('1', s.style.getPropertyValue('x'))

        sel, style = s.selectorList, s.style

        invalids = (
            '$b { x:2 }',  # invalid selector
            'c { $x3 }',  # invalid style
            '/b { 2 }'  # both invalid
        )
        for invalid in invalids:
            try:
                s.cssText = invalid
            except xml.dom.DOMException, e:
                pass
            self.assertEqual(sel, s.selectorList)
            self.assertEqual(u'a', s.selectorList.selectorText)
            self.assertEqual(style, s.style)
            self.assertEqual(u'1', s.style.getPropertyValue('x'))

        # CHANGING
        s = cssutils.parseString(u'a {s1: 1}')
        r = s.cssRules[0]
        sel1 = r.selectorList
        st1 = r.style

        # selectorList
        r.selectorText = 'b'
        self.assertNotEqual(sel1, r.selectorList)
        self.assertEqual('b', r.selectorList.selectorText)
        self.assertEqual('b', r.selectorText)
        sel1b = r.selectorList

        sel1b.selectorText = 'c'
        self.assertEqual(sel1b, r.selectorList)
        self.assertEqual('c', r.selectorList.selectorText)
        self.assertEqual('c', r.selectorText)

        sel2 = cssutils.css.SelectorList('sel2')
        s.selectorList = sel2
        self.assertEqual(sel2, s.selectorList)
        self.assertEqual('sel2', s.selectorList.selectorText)

        sel2.selectorText = 'sel2b'
        self.assertEqual('sel2b', sel2.selectorText)
        self.assertEqual('sel2b', s.selectorList.selectorText)

        s.selectorList.selectorText = 'sel2c'
        self.assertEqual('sel2c', sel2.selectorText)
        self.assertEqual('sel2c', s.selectorList.selectorText)

        # style
        r.style = 's1: 2'
        self.assertNotEqual(st1, r.style)
        self.assertEqual('s1: 2', r.style.cssText)

        st2 = cssutils.parseStyle(u's2: 1')
        r.style = st2
        self.assertEqual(st2, r.style)
        self.assertEqual('s2: 1', r.style.cssText)

        # cssText
        sl, st = r.selectorList, r.style
        # fails
        try:
            r.cssText = '$ {content: "new"}'
        except xml.dom.SyntaxErr, e:
            pass
Exemplo n.º 38
0
def getView(document, css):
    """
	document
		a DOM document, currently an lxml HTML document
	css
		a CSS StyleSheet string
	
	returns style view
		a dict of {DOMElement: css.CSSStyleDeclaration} for html
	"""
    from lxml.cssselect import CSSSelector
    sheet = cssutils.parseString(css)

    view = {}
    specificities = {}  # needed temporarily

    # TODO: filter rules simpler?, add @media
    rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
    for rule in rules:
        for selector in rule.selectorList:
            #log(0, 'SELECTOR', selector.selectorText)
            # TODO: make this a callback to be able to use other stuff than lxml
            try:
                cssselector = CSSSelector(selector.selectorText)
            except:
                continue
            matching = cssselector.evaluate(document)

            for element in matching:
                #if element.tag in ('div',):
                # add styles for all matching DOM elements
                #log(1, 'ELEMENT', id(element), element.text)

                if element not in view:
                    # add initial empty style declatation
                    view[element] = cssutils.css.CSSStyleDeclaration(
                    )  # @UndefinedVariable
                    specificities[element] = {}

                for p in rule.style:
                    # update style declaration
                    if p not in view[element]:
                        # setProperty needs a new Property object and
                        # MUST NOT reuse the existing Property
                        # which would be the same for all elements!
                        # see Issue #23
                        view[element].setProperty(p.name, p.value, p.priority)
                        specificities[element][p.name] = selector.specificity
                        #log(2, view[element].getProperty('color'))

                    else:
                        #log(2, view[element].getProperty('color'))
                        sameprio = (
                            p.priority == view[element].getPropertyPriority(
                                p.name))
                        if not sameprio and bool(p.priority) or (
                                sameprio and selector.specificity >=
                                specificities[element][p.name]):
                            # later, more specific or higher prio
                            view[element].setProperty(p.name, p.value,
                                                      p.priority)

    return view
Exemplo n.º 39
0
 def adddata(self):
     """Add more data to the svgimgutils"""
     self.style_element = self.root.find('./' + self.root[0].tag + '/' +
                                         self.root[0][0].tag)
     self.number_of_classes = cssutils.parseString(
         self.style_element.text).cssRules.length
Exemplo n.º 40
0
    def check_css(self, myfile):
        """Find unused CSS and undefined used CCS.
        """

        # Fails on a few corner cases, such as
        #  ".tdm > tbody > tr > td:first-child + td"
        #  ".onetable td"
        #
        # Ignores @media

        # Find the CCS style
        css_selectors = []
        css = myfile.tree.find('head').find('style')

        if css == None:
            return

        # The CSS can be in a comment or not
        if len(css):
            # Not sure whether that covers all the comment cases. Maybe add
            # all the children
            css_string = etree.tostring(css[0])
        else:
            css_string = css.text

        # Parse the CSS and retrieve the errors
        mylog = io.StringIO()
        h = logging.StreamHandler(mylog)
        h.setFormatter(logging.Formatter('%(levelname)s %(message)s'))
        cssutils.log.addHandler(h)
        cssutils.log.setLevel(logging.INFO)
        css_sheet = cssutils.parseString(css_string)

        self.cssutils_errors = [ x for x in mylog.getvalue().splitlines() if x ]


        for rule in css_sheet:

            # We don't want comments, media, ...
            if rule.type != cssutils.css.CSSRule.STYLE_RULE:
                continue

            rules = rule.selectorText.split(',')
            for rule in rules:
                # Cleanup rule
                selector = rule.strip()

                css_selectors.append(selector)


        # Find the unused/undefined CSS. It is possible 2 rules will
        # match the same class (for instance "p.foo" and ".foo" will
        # match "class=foo"). That is not detected, and both rules
        # will be valid.
        self.sel_unchecked = []
        self.sel_unused = []
        for selector in css_selectors:

            # Get the selector (eg. "body", "p", ".some_class")
            try:
                sel = CSSSelector(selector)
            except:
                self.sel_unchecked.append(selector)
                continue

            # Retrieve where it is used in the xhtml
            occurences = sel(myfile.tree)

            if len(occurences) == 0:
                self.sel_unused.append(sel.css)
                continue

            # If it's from a class, find the name. It should be the
            # last word starting with a dot (eg. "p.foo", ".foo",
            # "#toc .foo" => "foo")
            m = re.match('^.*\.([\w-]+)$', sel.css)

            if m == None:
                continue

            cl = m.group(1)
            assert len(cl) > 0

            # Mark the class wherever it is used, in each element
            for item in occurences:
                if 'class' in item.attrib:
                    old_classes = item.attrib['class'].split(' ')

                    found = 0
                    for it_cl in old_classes:
                        it_cl.strip()
                        if it_cl == cl:
                            # The class has been found. Mark it.
                            found = 1
                            if '__used_classes' not in item.attrib:
                                item.attrib['__used_classes'] = it_cl
                            else:
                                item.attrib['__used_classes'] += ' ' + it_cl

                    assert found == 1

                else:
                    # I don't think that should happen
                    print("KPPVH ERROR - matches " + cl + " but no class - line " + str(item.sourceline))

        # Look for unused classes
        self.classes_undefined = []
        # todo- use xpath instead
        for element in myfile.tree.iter(tag=etree.Element):
            if 'class' in element.attrib:
                classes = element.attrib['class'].strip()
                classes = re.sub(r"\s+", " ", classes)
                classes = classes.split(' ')

                if '__used_classes' in element.attrib:
                    used_classes = element.attrib['__used_classes'].strip().split(' ')

                    # Substract content of used_classes from classes
                    classes = list(set(classes) - set(used_classes))

                # Finally, print the warning)
                for cl in classes:
                    self.classes_undefined.append([element.sourceline, cl])
Exemplo n.º 41
0
    def getView(self, document, css):

        view = {}
        specificities = {}
        supportratios = {}
        supportFailRate = 0
        supportTotalRate = 0
        compliance = dict()

        #load CSV containing css property client support into dict
        mycsv = csv.DictReader(open(
            os.path.join(os.path.dirname(__file__), "css_compliance.csv")),
                               delimiter=',')

        for row in mycsv:
            #count clients so we can calculate an overall support percentage later
            clientCount = len(row)
            compliance[row['property'].strip()] = dict(row)

        #decrement client count to account for first col which is property name
        clientCount -= 1

        #sheet = csscombine(path="http://www.torchbox.com/css/front/import.css")
        sheet = cssutils.parseString(css)

        rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
        for rule in rules:

            for selector in rule.selectorList:
                try:
                    cssselector = CSSSelector(selector.selectorText)
                    matching = cssselector.evaluate(document)

                    for element in matching:
                        # add styles for all matching DOM elements
                        if element not in view:
                            # add initial
                            view[element] = cssutils.css.CSSStyleDeclaration()
                            specificities[element] = {}

                            # add inline style if present
                            inlinestyletext = element.get('style')
                            if inlinestyletext:
                                inlinestyle = cssutils.css.CSSStyleDeclaration(
                                    cssText=inlinestyletext)
                            else:
                                inlinestyle = None
                            if inlinestyle:
                                for p in inlinestyle:
                                    # set inline style specificity
                                    view[element].setProperty(p)
                                    specificities[element][p.name] = (1, 0, 0,
                                                                      0)

                        for p in rule.style:
                            #create supportratio dic item for this property
                            if p.name not in supportratios:
                                supportratios[p.name] = {
                                    'usage': 0,
                                    'failedClients': 0
                                }
                            #increment usage
                            supportratios[p.name]['usage'] += 1

                            try:
                                if not p.name in self.CSSUnsupportErrors:
                                    for client, support in compliance[
                                            p.name].items():
                                        if support == "N" or support == "P":
                                            #increment client failure count for this property
                                            supportratios[
                                                p.name]['failedClients'] += 1
                                            if not p.name in self.CSSUnsupportErrors:
                                                if support == "P":
                                                    self.CSSUnsupportErrors[
                                                        p.name] = [
                                                            client +
                                                            ' (partial support)'
                                                        ]
                                                else:
                                                    self.CSSUnsupportErrors[
                                                        p.name] = [client]
                                            else:
                                                if support == "P":
                                                    self.CSSUnsupportErrors[
                                                        p.name].append(
                                                            client +
                                                            ' (partial support)'
                                                        )
                                                else:
                                                    self.CSSUnsupportErrors[
                                                        p.name].append(client)

                            except KeyError:
                                pass

                            # update styles
                            if p not in view[element]:
                                view[element].setProperty(
                                    p.name, p.value, p.priority)
                                specificities[element][
                                    p.name] = selector.specificity
                            else:
                                sameprio = (p.priority ==
                                            view[element].getPropertyPriority(
                                                p.name))
                                if not sameprio and bool(p.priority) or (
                                        sameprio and selector.specificity >=
                                        specificities[element][p.name]):
                                    # later, more specific or higher prio
                                    view[element].setProperty(
                                        p.name, p.value, p.priority)

                except ExpressionError:
                    if str(sys.exc_info()[1]) not in self.CSSErrors:
                        self.CSSErrors.append(str(sys.exc_info()[1]))
                    pass

        for props, propvals in supportratios.items():
            supportFailRate += (propvals['usage']) * int(
                propvals['failedClients'])
            supportTotalRate += int(propvals['usage']) * clientCount

        if (supportFailRate and supportTotalRate):
            self.supportPercentage = 100 - (
                (float(supportFailRate) / float(supportTotalRate)) * 100)
        return view
Exemplo n.º 42
0
 def test_reader(cls) -> None:
     #return cls([CSSRuleBlock.load_rule(i) for i in cssutils.parseString(open('test_css.css').read()).cssRules if isinstance(i, cssutils.css.CSSStyleRule)])
     _rules = [(re.split(',\s+|,', re.sub('\[\w+\=[^\]]+\]|:+[\w\-]+|\s+\*|\*', '', i.selectorText)), i.style) for i in cssutils.parseString(open('test_css.css').read()).cssRules if isinstance(i, cssutils.css.CSSStyleRule)]
     #print('rules first part', _rules[:10])
     return cls([CSSRuleBlock._load_rule(i, a) for b, a in _rules for i in b if i])
Exemplo n.º 43
0
 def show_urls(s, data):
     stylesheet = cssutils.parseString(s)  # parseFile (f)  # its a start :)
     #print [u for u in cssutils.getUrls (stylesheet)]
     for u in cssutils.getUrls(stylesheet):
         print u
Exemplo n.º 44
0
import cssutils
import logging
cssutils.log.setLevel(logging.FATAL)

css = '''@import "example.css"; 
a {
    color: blue !important;
    c\olor: green !important;
    c\olor: pink;
    color: red;
    }'''

sheet = cssutils.parseString(css)
print "\nORIGINAL CSS:"
print css
print "------------"

print repr(cssutils.ser.prefs)

print "\nCSS Serialized"
print sheet.cssText

print "\nCSS Serialized with ``keepAllProperties`` = False"
cssutils.ser.prefs.keepAllProperties = False
print sheet.cssText

print "\nCSS Serialized with ``defaultPropertyName`` = True"
cssutils.ser.prefs.defaultPropertyName = True
print sheet.cssText

print "\nCSS Serialized with ``defaultPropertyName`` = False"
Exemplo n.º 45
0
    def test_cssRules(self):
        "CSSPageRule.cssRules"
        s = cssutils.parseString('@page {}')
        p = s.cssRules[0]

        self.assertEqual(len(p.cssRules), 0)

        # add and insert
        m1 = cssutils.css.MarginRule('@top-left', 'color: red')
        i = p.add(m1)
        self.assertEqual(i, 0)
        self.assertEqual(len(p.cssRules), 1)

        m3 = cssutils.css.MarginRule()
        m3.cssText = '@top-right { color: blue }'
        i = p.insertRule(m3)
        self.assertEqual(i, 1)
        self.assertEqual(len(p.cssRules), 2)

        m2 = cssutils.css.MarginRule()
        m2.margin = '@top-center'
        m2.style = 'color: green'
        i = p.insertRule(m2, 1)
        self.assertEqual(i, 1)
        self.assertEqual(len(p.cssRules), 3)

        self.assertEqual(
            p.cssText, '''@page {
    @top-left {
        color: red
        }
    @top-center {
        color: green
        }
    @top-right {
        color: blue
        }
    }''')

        # keys and dict index
        self.assertEqual('@top-left' in p, True)
        self.assertEqual('@bottom-left' in p, False)

        self.assertEqual(list(p.keys()),
                         ['@top-left', '@top-center', '@top-right'])

        self.assertEqual(p['@bottom-left'], None)
        self.assertEqual(p['@top-left'].cssText, 'color: red')
        p['@top-left'] = 'color: #f00'
        self.assertEqual(p['@top-left'].cssText, 'color: #f00')

        # delete
        p.deleteRule(m2)
        self.assertEqual(len(p.cssRules), 2)
        self.assertEqual(
            p.cssText, '''@page {
    @top-left {
        color: #f00
        }
    @top-right {
        color: blue
        }
    }''')

        p.deleteRule(0)
        self.assertEqual(len(p.cssRules), 1)
        self.assertEqual(m3, p.cssRules[0])
        self.assertEqual(
            p.cssText, '''@page {
    @top-right {
        color: blue
        }
    }''')

        del p['@top-right']
        self.assertEqual(len(p.cssRules), 0)
Exemplo n.º 46
0
 def test_prioriy(self):
     "Property.priority"
     s = cssutils.parseString('a { color: red }')
     self.assertEqual(s.cssText, 'a {\n    color: red\n    }'.encode())
Exemplo n.º 47
0
def web_crawler(url, depth=0, page_assets=False):

    if depth >= 0:
        opener = request.build_opener()
        opener.add_headers = [{'User-Agent': 'Mozilla'}]
        request.install_opener(opener)

        base_url = "{0.scheme}://{0.netloc}/".format(parse.urlsplit(url))
        if url not in links:
            links.append(url)
        raw = requests.get(url).text
        if page_assets:
            try:
                sheet = cssutils.parseString(requests.get(url).content)
                urls = cssutils.getUrls(sheet)
                for url in urls:
                    if url not in links:
                        links.append(url)

                        path = request.urlopen(url)
                        meta = path.info()
                        print(url, ' size: ', meta.get(name="Content-Length"))
            except:
                pass

        soup = bs(raw, 'html.parser')

        for script in soup.find_all("script"):
            if script.attrs.get("src"):
                script_url = parse.urljoin(url, script.attrs.get("src"))
                if script_url not in assets:
                    path = request.urlopen(script_url)
                    meta = path.info()
                    print(script_url, ' size: ',
                          meta.get(name="Content-Length"))
                    assets.append(script_url)
                    if page_assets and script_url not in links:
                        links.append(script_url)
                        web_crawler(script_url, depth - 1, page_assets)

        for css in soup.find_all("link", {"rel": "stylesheet"}):
            if css.attrs.get("href"):
                css_url = parse.urljoin(url, css.attrs.get("href"))
                if css_url not in assets:
                    try:
                        path = request.urlopen(css_url)
                        meta = path.info()
                        print(css_url, ' ', 'size: ',
                              meta.get(name="Content-Length"))
                        assets.append(css_url)
                        if page_assets and css_url not in links:
                            links.append(css_url)
                            web_crawler(css_url, depth - 1, page_assets)
                    except:
                        pass

        for img in soup.find_all("img"):
            if img.get("src"):
                img_url = parse.urljoin(url, img.get("src"))
                try:
                    path = request.urlopen(img_url)
                    meta = path.info()

                    if img_url not in assets:
                        print(img_url, ' ', 'size: ',
                              meta.get(name="Content-Length"))
                        assets.append(img_url)
                except:
                    pass

        for a in soup.find_all('a'):
            href = str(a.get('href'))

            if 'http://' not in href and 'https://' not in href and base_url not in href:
                href = base_url + href[1:]

            if href not in links:
                path = request.urlopen(href)
                meta = path.info()

                print(href, ' ', 'size: ', meta.get(name="Content-Length"))

                links.append(href)
                web_crawler(href, depth - 1, page_assets)
Exemplo n.º 48
0
    def test_useDefaults(self):
        "Preferences.useDefaults()"
        cssutils.ser.prefs.useMinified()
        cssutils.ser.prefs.useDefaults()
        self.assertEqual(cssutils.ser.prefs.defaultAtKeyword, True)
        self.assertEqual(cssutils.ser.prefs.defaultPropertyName, True)
        self.assertEqual(cssutils.ser.prefs.defaultPropertyPriority, True)
        self.assertEqual(cssutils.ser.prefs.importHrefFormat, None)
        self.assertEqual(cssutils.ser.prefs.indent, 4 * ' ')
        self.assertEqual(cssutils.ser.prefs.indentClosingBrace, True)
        self.assertEqual(cssutils.ser.prefs.keepAllProperties, True)
        self.assertEqual(cssutils.ser.prefs.keepComments, True)
        self.assertEqual(cssutils.ser.prefs.keepEmptyRules, False)
        self.assertEqual(cssutils.ser.prefs.keepUnknownAtRules, True)
        self.assertEqual(cssutils.ser.prefs.keepUsedNamespaceRulesOnly, False)
        self.assertEqual(cssutils.ser.prefs.lineNumbers, False)
        self.assertEqual(cssutils.ser.prefs.lineSeparator, '\n')
        self.assertEqual(cssutils.ser.prefs.listItemSpacer, ' ')
        self.assertEqual(cssutils.ser.prefs.omitLastSemicolon, True)
        self.assertEqual(cssutils.ser.prefs.omitLeadingZero, False)
        self.assertEqual(cssutils.ser.prefs.paranthesisSpacer, ' ')
        self.assertEqual(cssutils.ser.prefs.propertyNameSpacer, ' ')
        self.assertEqual(cssutils.ser.prefs.selectorCombinatorSpacer, ' ')
        self.assertEqual(cssutils.ser.prefs.spacer, ' ')
        self.assertEqual(cssutils.ser.prefs.validOnly, False)
        css = '''
    /*1*/
    @import url(x) tv , print;
    @namespace prefix "uri";
    @namespace unused "unused";
    @media all {}
    @media all {
        a {}
    }
    @media   all  {
    a { color: red; }
        }
    @page     { left: 0; }
    a {}
    prefix|x, a  +  b  >  c  ~  d  ,  b { top : 1px ;
        font-family : arial ,'some'
        }
    '''
        parsedcss = '''/*1*/
@import url(x) tv, print;
@namespace prefix "uri";
@namespace unused "unused";
@media all {
    a {
        color: red
        }
    }
@page {
    left: 0
    }
prefix|x, a + b > c ~ d, b {
    top: 1px;
    font-family: arial, "some"
    }'''
        s = cssutils.parseString(css)
        self.assertEqual(s.cssText, parsedcss.encode())
        
        tests = {
            '0.1 .1 0.1px .1px 0.1% .1% +0.1 +.1 +0.1px +.1px +0.1% +.1% -0.1 -.1 -0.1px -.1px -0.1% -.1%': 
            '0.1 0.1 0.1px 0.1px 0.1% 0.1% +0.1 +0.1 +0.1px +0.1px +0.1% +0.1% -0.1 -0.1 -0.1px -0.1px -0.1% -0.1%' 
        }
        cssutils.ser.prefs.useDefaults()
        for test, exp in list(tests.items()):
            s = cssutils.parseString('a{x:%s}' % test)
            self.assertEqual(('a {\n    x: %s\n    }' % exp).encode(), s.cssText)
Exemplo n.º 49
0
    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css='', base_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        if base_css:
            stylesheets.append(parseString(base_css, validate=False))
        style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]')

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in style_tags:
            if (elem.tag == XHTML('style') and
                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == 'amzn-mobi':
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn('CSS @import of non-CSS file %r' % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    for rule in tuple(stylesheet.cssRules.rulesOfType(CSSRule.PAGE_RULE)):
                        stylesheet.cssRules.remove(rule)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {rule.media.item(i) for i in
                             xrange(rule.media.length)}
                    if not media.intersection({'all', 'screen', 'amzn-kf8'}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
        select = Select(tree, ignore_inappropriate_pseudo_classes=True)

        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            try:
                matches = tuple(select(text))
            except SelectorError as err:
                self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err)))
                continue

            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                        'plumber_output_format', '').lower() in {u'mobi', u'docx'}:
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter('*'):
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.set('data-fake-first-letter', '1')
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'[0-9.]+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Exemplo n.º 50
0
  var-theme-colour-1: #009EE0; 
  var-theme-colour-2: #FFED00; 
  var-theme-colour-3: #E2007A; 
  var-spacing: 24px;
}
        a {
            bottom: var(b, x);
            color: var(theme-colour-1, rgb(14,14,14));
            left: var(L, 1px);
            z-index: var(L, 1);
            top: var(T, calc( 2 * 1px ));
            background: var(U, url(example.png));
            border-color: var(C, #f00)
        }
    '''
    print(cssutils.parseString(css).cssText)

    sys.exit(1)

if 1:
    css = '''@media all and (width: 10px), all and (height:20px) {
        a {color:red}
    }
    '''
    css = '''@media (min-device-pixel-ratio: 1.3), (min-resolution: 1.3dppx){ 
        a {color:red}
    }'''
    css = '''@media not handheld/**/,/**/ all/**/and/**/ (/**/width: 10px) and (color), tv/**/{
        a {color:red}
    }'''
    css = '''@media tv,braille,tv {
Exemplo n.º 51
0
    def parse_page(self, url, processed_pages={}, index=None):
        # if this is the first page being parse, set it as the index.html
        if not index:
            index = url

        log.info(f"Parsing page '{url}'")
        log.debug(f"Using page config: {self.get_page_config(url)}")
        self.driver.get(url)

        try:
            WebDriverWait(self.driver, 60).until(notion_page_loaded())
        except TimeoutException as ex:
            log.critical(
                "Timeout waiting for page content to load, or no content found."
                " Are you sure the page is set to public?")
            return

        # scroll at the bottom of the notion-scroller element to load all elements
        # continue once there are no changes in height after a timeout
        # don't do this if the page has a calendar databse on it or it will load forever
        calendar = self.driver.find_elements_by_class_name(
            "notion-calendar-view")
        if not calendar:
            scroller = self.driver.find_element_by_css_selector(
                ".notion-frame > .notion-scroller")
            last_height = scroller.get_attribute("scrollHeight")
            log.debug(
                f"Scrolling to bottom of notion-scroller (height: {last_height})"
            )
            while True:
                self.driver.execute_script(
                    "arguments[0].scrollTo(0, arguments[0].scrollHeight)",
                    scroller)
                time.sleep(self.args["timeout"])
                new_height = scroller.get_attribute("scrollHeight")
                log.debug(
                    f"New notion-scroller height after timeout is: {new_height}"
                )
                if new_height == last_height:
                    break
                last_height = new_height

        # function to expand all the toggle block in the page to make their content visible
        # so we can hook up our custom toggle logic afterwards
        def open_toggle_blocks(timeout, exclude=[]):
            opened_toggles = exclude
            toggle_blocks = self.driver.find_elements_by_class_name(
                "notion-toggle-block")
            log.debug(
                f"Opening {len(toggle_blocks)} new toggle blocks in the page")
            for toggle_block in toggle_blocks:
                if not toggle_block in opened_toggles:
                    toggle_button = toggle_block.find_element_by_css_selector(
                        "div[role=button]")
                    # check if the toggle is already open by the direction of its arrow
                    is_toggled = "(180deg)" in (
                        toggle_button.find_element_by_tag_name(
                            "svg").get_attribute("style"))
                    if not is_toggled:
                        # click on it, then wait until all elements are displayed
                        toggle_button.click()
                        try:
                            WebDriverWait(self.driver, timeout).until(
                                toggle_block_has_opened(toggle_block))
                        except TimeoutException as ex:
                            log.warning(
                                "Timeout waiting for toggle block to open."
                                " Likely it's already open, but doesn't hurt to check."
                            )
                        except Exception as exception:
                            log.error(
                                f"Error trying to open a toggle block: {exception}"
                            )
                        opened_toggles.append(toggle_block)

            # after all toggles have been opened, check the page again to see if
            # any toggle block had nested toggle blocks inside them
            new_toggle_blocks = self.driver.find_elements_by_class_name(
                "notion-toggle-block")
            if len(new_toggle_blocks) > len(toggle_blocks):
                # if so, run the function again
                open_toggle_blocks(timeout, opened_toggles)

        # open the toggle blocks in the page
        open_toggle_blocks(self.args["timeout"])

        # creates soup from the page to start parsing
        soup = BeautifulSoup(self.driver.page_source, "html.parser")

        # remove scripts and other tags we don't want / need
        for unwanted in soup.findAll("script"):
            unwanted.decompose()
        for intercom_frame in soup.findAll("div", {"id": "intercom-frame"}):
            intercom_frame.decompose()
        for intercom_div in soup.findAll(
                "div", {"class": "intercom-lightweight-app"}):
            intercom_div.decompose()
        for overlay_div in soup.findAll("div",
                                        {"class": "notion-overlay-container"}):
            overlay_div.decompose()
        for vendors_css in soup.find_all("link",
                                         href=lambda x: x and "vendors~" in x):
            vendors_css.decompose()

        # clean up the default notion meta tags
        for tag in [
                "description",
                "twitter:card",
                "twitter:site",
                "twitter:title",
                "twitter:description",
                "twitter:image",
                "twitter:url",
                "apple-itunes-app",
        ]:
            unwanted_tag = soup.find("meta", attrs={"name": tag})
            if unwanted_tag:
                unwanted_tag.decompose()
        for tag in [
                "og:site_name",
                "og:type",
                "og:url",
                "og:title",
                "og:description",
                "og:image",
        ]:
            unwanted_og_tag = soup.find("meta", attrs={"property": tag})
            if unwanted_og_tag:
                unwanted_og_tag.decompose()

        # set custom meta tags
        custom_meta_tags = self.get_page_config(url).get("meta", [])
        for custom_meta_tag in custom_meta_tags:
            tag = soup.new_tag("meta")
            for attr, value in custom_meta_tag.items():
                tag.attrs[attr] = value
            log.debug(f"Adding meta tag {str(tag)}")
            soup.head.append(tag)

        # process images & emojis
        cache_images = True
        for img in soup.findAll("img"):
            if img.has_attr("src"):
                if cache_images and not "data:image" in img["src"]:
                    img_src = img["src"]
                    # if the path starts with /, it's one of notion's predefined images
                    if img["src"].startswith("/"):
                        img_src = "https://www.notion.so" + img["src"]
                        # notion's own default images urls are in a weird format, need to sanitize them
                        # img_src = 'https://www.notion.so' + img['src'].split("notion.so")[-1].replace("notion.so", "").split("?")[0]
                        # if (not '.amazonaws' in img_src):
                        # img_src = urllib.parse.unquote(img_src)

                    cached_image = self.cache_file(img_src)
                    img["src"] = cached_image
                else:
                    if img["src"].startswith("/"):
                        img["src"] = "https://www.notion.so" + img["src"]

            # on emoji images, cache their sprite sheet and re-set their background url
            if img.has_attr("class") and "notion-emoji" in img["class"]:
                style = cssutils.parseStyle(img["style"])
                spritesheet = style["background"]
                spritesheet_url = spritesheet[spritesheet.find("(") +
                                              1:spritesheet.find(")")]
                cached_spritesheet_url = self.cache_file(
                    "https://www.notion.so" + spritesheet_url)
                style["background"] = spritesheet.replace(
                    spritesheet_url, str(cached_spritesheet_url))
                img["style"] = style.cssText

        # process stylesheets
        for link in soup.findAll("link", rel="stylesheet"):
            if link.has_attr("href") and link["href"].startswith("/"):
                # we don't need the vendors stylesheet
                if "vendors~" in link["href"]:
                    continue
                # css_file = link['href'].strip("/")
                cached_css_file = self.cache_file("https://www.notion.so" +
                                                  link["href"])
                with open(self.dist_folder / cached_css_file, "rb") as f:
                    stylesheet = cssutils.parseString(f.read())
                    # open the stylesheet and check for any font-face rule,
                    for rule in stylesheet.cssRules:
                        if rule.type == cssutils.css.CSSRule.FONT_FACE_RULE:
                            # if any are found, download the font file
                            font_file = (rule.style["src"].split("url(/")
                                         [-1].split(") format")[0])
                            cached_font_file = self.cache_file(
                                f"https://www.notion.so/{font_file}")
                            rule.style["src"] = f"url({str(cached_font_file)})"
                link["href"] = str(cached_css_file)

        # add our custom logic to all toggle blocks
        for toggle_block in soup.findAll("div",
                                         {"class": "notion-toggle-block"}):
            toggle_id = uuid.uuid4()
            toggle_button = toggle_block.select_one("div[role=button]")
            toggle_content = toggle_block.find("div", {
                "class": None,
                "style": ""
            })
            if toggle_button and toggle_content:
                # add a custom class to the toggle button and content,
                # plus a custom attribute sharing a unique uiid so
                # we can hook them up with some custom js logic later
                toggle_button["class"] = toggle_block.get(
                    "class", []) + ["loconotion-toggle-button"]
                toggle_content["class"] = toggle_content.get(
                    "class", []) + ["loconotion-toggle-content"]
                toggle_content.attrs[
                    "loconotion-toggle-id"] = toggle_button.attrs[
                        "loconotion-toggle-id"] = toggle_id

        # if there are any table views in the page, add links to the title rows
        # the link to the row item is equal to its data-block-id without dashes
        for table_view in soup.findAll("div", {"class": "notion-table-view"}):
            for table_row in table_view.findAll(
                    "div", {"class": "notion-collection-item"}):
                table_row_block_id = table_row["data-block-id"]
                table_row_href = "/" + table_row_block_id.replace("-", "")
                row_target_span = table_row.find("span")
                row_link_wrapper = soup.new_tag("a",
                                                attrs={
                                                    "href": table_row_href,
                                                    "style": "cursor: pointer;"
                                                })
                row_target_span.wrap(row_link_wrapper)

        # embed custom google font(s)
        fonts_selectors = {
            "site": "div:not(.notion-code-block)",
            "navbar": ".notion-topbar div",
            "title":
            ".notion-page-block > div, .notion-collection_view_page-block > div[data-root]",
            "h1":
            ".notion-header-block div, notion-page-content > notion-collection_view-block > div:first-child div",
            "h2": ".notion-sub_header-block div",
            "h3": ".notion-sub_sub_header-block div",
            "body": ".notion-scroller",
            "code": ".notion-code-block *",
        }
        custom_fonts = self.get_page_config(url).get("fonts", {})
        if custom_fonts:
            # append a stylesheet importing the google font for each unique font
            unique_custom_fonts = set(custom_fonts.values())
            for font in unique_custom_fonts:
                if font:
                    google_fonts_embed_name = font.replace(" ", "+")
                    font_href = f"https://fonts.googleapis.com/css2?family={google_fonts_embed_name}:wght@500;600;700&display=swap"
                    custom_font_stylesheet = soup.new_tag("link",
                                                          rel="stylesheet",
                                                          href=font_href)
                    soup.head.append(custom_font_stylesheet)

            # go through each custom font, and add a css rule overriding the font-family
            # to the font override stylesheet targetting the appropriate selector
            font_override_stylesheet = soup.new_tag("style", type="text/css")
            for target, custom_font in custom_fonts.items():
                if custom_font and not target == "site":
                    log.debug(f"Setting {target} font-family to {custom_font}")
                    font_override_stylesheet.append(fonts_selectors[target] +
                                                    " {font-family:" +
                                                    custom_font +
                                                    " !important} ")
            site_font = custom_fonts.get("site", None)
            # process global site font last to more granular settings can override it
            if site_font:
                log.debug(f"Setting global site font-family to {site_font}"),
                font_override_stylesheet.append(fonts_selectors["site"] +
                                                " {font-family:" + site_font +
                                                "} ")
            # finally append the font overrides stylesheets to the page
            soup.head.append(font_override_stylesheet)

        # inject any custom elements to the page
        custom_injects = self.get_page_config(url).get("inject", {})

        def injects_custom_tags(section):
            section_custom_injects = custom_injects.get(section, {})
            for tag, elements in section_custom_injects.items():
                for element in elements:
                    injected_tag = soup.new_tag(tag)
                    for attr, value in element.items():
                        injected_tag[attr] = value
                        # if the value refers to a file, copy it to the dist folder
                        if attr.lower() == "href" or attr.lower() == "src":
                            log.debug(f"Copying injected file '{value}'")
                            cached_custom_file = self.cache_file(
                                (Path.cwd() / value.strip("/")))
                            # destination = (self.dist_folder / source.name)
                            # shutil.copyfile(source, destination)
                            injected_tag[attr] = str(
                                cached_custom_file)  # source.name
                    log.debug(
                        f"Injecting <{section}> tag: {str(injected_tag)}")
                    soup.find(section).append(injected_tag)

        injects_custom_tags("head")
        injects_custom_tags("body")

        # inject loconotion's custom stylesheet and script
        loconotion_custom_css = self.cache_file(Path("bundles/loconotion.css"))
        custom_css = soup.new_tag("link",
                                  rel="stylesheet",
                                  href=str(loconotion_custom_css))
        soup.head.insert(-1, custom_css)
        loconotion_custom_js = self.cache_file(Path("bundles/loconotion.js"))
        custom_script = soup.new_tag("script",
                                     type="text/javascript",
                                     src=str(loconotion_custom_js))
        soup.body.insert(-1, custom_script)

        # find sub-pages and clean slugs / links
        sub_pages = []
        for a in soup.findAll("a"):
            if a["href"].startswith("/"):
                sub_page_href = "https://www.notion.so" + a["href"]
                # if the link is an anchor link,
                # check if the page hasn't already been parsed
                if "#" in sub_page_href:
                    sub_page_href_tokens = sub_page_href.split("#")
                    sub_page_href = sub_page_href_tokens[0]
                    a["href"] = "#" + sub_page_href_tokens[-1]
                    a["class"] = a.get("class",
                                       []) + ["loconotion-anchor-link"]
                    if (sub_page_href in processed_pages.keys()
                            or sub_page_href in sub_pages):
                        log.debug(
                            f"Original page for anchor link {sub_page_href}"
                            " already parsed / pending parsing, skipping")
                        continue
                else:
                    a["href"] = (self.get_page_slug(sub_page_href)
                                 if sub_page_href != index else "index.html")
                sub_pages.append(sub_page_href)
                log.debug(f"Found link to page {a['href']}")

        # exports the parsed page
        html_str = str(soup)
        html_file = self.get_page_slug(url) if url != index else "index.html"
        if html_file in processed_pages.values():
            log.error(
                f"Found duplicate pages with slug '{html_file}' - previous one will be"
                " overwritten. Make sure that your notion pages names or custom slugs"
                " in the configuration files are unique")
        log.info(f"Exporting page '{url}' as '{html_file}'")
        with open(self.dist_folder / html_file, "wb") as f:
            f.write(html_str.encode("utf-8").strip())
        processed_pages[url] = html_file

        # parse sub-pages
        if sub_pages and not self.args.get("single_page", False):
            if processed_pages:
                log.debug(f"Pages processed so far: {len(processed_pages)}")
            for sub_page in sub_pages:
                if not sub_page in processed_pages.keys():
                    self.parse_page(sub_page,
                                    processed_pages=processed_pages,
                                    index=index)

        # we're all done!
        return processed_pages
Exemplo n.º 52
0
    def generateFontCSS(self):
        # Список стилей для встраивания шрифтов
        style_rules = [
            '.titleblock', '.text-author', 'p', 'p.title', '.cite', '.poem',
            '.table th', '.table td', '.annotation', 'body'
        ]

        css_string = modules.default_css.gui_default_css
        css = cssutils.parseString(css_string)

        font_regular = ''
        font_italic = ''
        font_bold = ''
        font_bolditalic = ''

        if 'Regular' in self.gui_config.fontDb.families[
                self.gui_config.embedFontFamily]:
            font_regular = self.gui_config.fontDb.families[
                self.gui_config.embedFontFamily]['Regular']

        if 'Italic' in self.gui_config.fontDb.families[
                self.gui_config.embedFontFamily]:
            font_italic = self.gui_config.fontDb.families[
                self.gui_config.embedFontFamily]['Italic']
        else:
            font_italic = font_regular

        if 'Bold' in self.gui_config.fontDb.families[
                self.gui_config.embedFontFamily]:
            font_bold = self.gui_config.fontDb.families[
                self.gui_config.embedFontFamily]['Bold']
        else:
            font_bold = font_regular

        if 'Bold Italic' in self.gui_config.fontDb.families[
                self.gui_config.embedFontFamily]:
            font_bolditalic = self.gui_config.fontDb.families[
                self.gui_config.embedFontFamily]['Bold Italic']
        else:
            font_bolditalic = font_italic

        css.add('@font-face {{ font-family: "para"; src: url("fonts/{0}"); }}'.
                format(font_regular))
        css.add(
            '@font-face {{ font-family: "para"; src: url("fonts/{0}"); font-style: italic; }}'
            .format(font_italic))
        css.add(
            '@font-face {{ font-family: "para"; src: url("fonts/{0}"); font-weight: bold; }}'
            .format(font_bold))
        css.add(
            '@font-face {{ font-family: "para"; src: url("fonts/{0}"); font-style: italic; font-weight: bold; }}'
            .format(font_bolditalic))

        found_body = False

        for rule in css:
            if rule.type == rule.STYLE_RULE:
                if rule.selectorText in style_rules:
                    rule.style['font-family'] = '"para"'
                if rule.selectorText == 'body':
                    found_body = True

        # Добавим стиль для
        if not found_body:
            css.add('body {font-family: "para"; line-height: 100%; }')

        css_path = os.path.join(os.path.dirname(self.config_file), 'profiles')
        if not os.path.exists(css_path):
            os.makedirs(css_path)

        with codecs.open(os.path.join(css_path, '_font.css'), 'w',
                         'utf-8') as f:
            f.write(str(css.cssText, 'utf-8'))
Exemplo n.º 53
0
    def get_body(self, root_url, urls, visited_urls):
        if not root_url.startswith("http"):
            root_url = 'http://' + root_url
        visited_urls.append(root_url)
        parsed_url = urlparse(root_url)
        if parsed_url.fragment:
            return
        domain = parsed_url.netloc
        if not domain.endswith('/'):
            domain += '/'
        file_name = self.make_new_link(root_url)

        file_path = ''
        patt = '/.*/.*\.'
        if re.match(patt, file_name):
            file_path, file_name = file_name.rsplit('/', 1)
            file_path += '/'
        print('path: ', file_path, 'name: ', file_name)
        if len(domain) < 4:
            sys.exit('invalid taget {}'.format(root_url))
        page_path = '/opt/snare/pages/{}'.format(domain)

        if not os.path.exists(page_path):
            os.mkdir(page_path)

        if file_path and not os.path.exists(page_path + file_path):
            os.makedirs(page_path + file_path)

        data = None
        try:
            with aiohttp.Timeout(10.0):
                with aiohttp.ClientSession() as session:
                    response = yield from session.get(root_url)
                    data = yield from response.read()
        except Exception as e:
            print(e)
        else:
            response.release()
            session.close()
        if data is not None:
            if re.match(re.compile('.*\.(html|php)'), file_name):
                soup = self.replace_links(data, domain, urls)
                data = str(soup).encode()
            with open(page_path + file_path + file_name, 'wb') as index_fh:
                index_fh.write(data)
            if '.css' in file_name:
                css = cssutils.parseString(data)
                for carved_url in cssutils.getUrls(css):
                    if carved_url.startswith('data'):
                        continue
                    carved_url = os.path.normpath(
                        os.path.join(domain, carved_url))
                    if not carved_url.startswith('http'):
                        if carved_url.startswith(
                                '..') or carved_url.startswith('/'):
                            carved_url = 'http://' + domain + carved_url
                        else:
                            carved_url = 'http://' + carved_url
                    if carved_url not in visited_urls:
                        urls.insert(0, carved_url)
        for url in urls:
            urls.remove(url)
            if url in visited_urls:
                continue
            yield from self.get_body(url, urls, visited_urls)
Exemplo n.º 54
0
    def extract_css_into_flows(self):
        inlines = defaultdict(list)  # Ensure identical <style>s not repeated
        sheets = {}
        passthrough = getattr(self.opts, 'mobi_passthrough', False)

        for item in self.oeb.manifest:
            if item.media_type in OEB_STYLES:
                sheet = self.data(item)
                if not passthrough and not self.opts.expand_css and hasattr(item.data, 'cssText'):
                    condense_sheet(sheet)
                sheets[item.href] = len(self.flows)
                self.flows.append(sheet)

        def fix_import_rules(sheet):
            changed = False
            for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE):
                if rule.href:
                    href = item.abshref(rule.href)
                    idx = sheets.get(href, None)
                    if idx is not None:
                        idx = to_ref(idx)
                        rule.href = 'kindle:flow:%s?mime=text/css'%idx
                        changed = True
            return changed

        for item in self.oeb.spine:
            root = self.data(item)

            for link in XPath('//h:link[@href]')(root):
                href = item.abshref(link.get('href'))
                idx = sheets.get(href, None)
                if idx is not None:
                    idx = to_ref(idx)
                    link.set('href', 'kindle:flow:%s?mime=text/css'%idx)

            for tag in XPath('//h:style')(root):
                p = tag.getparent()
                idx = p.index(tag)
                raw = tag.text
                if not raw or not raw.strip():
                    extract(tag)
                    continue
                sheet = cssutils.parseString(raw, validate=False)
                if fix_import_rules(sheet):
                    raw = force_unicode(sheet.cssText, 'utf-8')

                repl = etree.Element(XHTML('link'), type='text/css',
                        rel='stylesheet')
                repl.tail='\n'
                p.insert(idx, repl)
                extract(tag)
                inlines[raw].append(repl)

        for raw, elems in inlines.items():
            idx = to_ref(len(self.flows))
            self.flows.append(raw)
            for link in elems:
                link.set('href', 'kindle:flow:%s?mime=text/css'%idx)

        for item in self.oeb.manifest:
            if item.media_type in OEB_STYLES:
                sheet = self.data(item)
                if hasattr(sheet, 'cssRules'):
                    fix_import_rules(sheet)

        for i, sheet in enumerate(tuple(self.flows)):
            if hasattr(sheet, 'cssText'):
                self.flows[i] = force_unicode(sheet.cssText, 'utf-8')
Exemplo n.º 55
0
    def test_useMinified(self):
        "Preferences.useMinified()"
        cssutils.ser.prefs.useDefaults()
        cssutils.ser.prefs.useMinified()
        self.assertEqual(cssutils.ser.prefs.defaultAtKeyword, True)
        self.assertEqual(cssutils.ser.prefs.defaultPropertyName, True)
        self.assertEqual(cssutils.ser.prefs.importHrefFormat, 'string')
        self.assertEqual(cssutils.ser.prefs.indent, u'')
        self.assertEqual(cssutils.ser.prefs.keepAllProperties, True)
        self.assertEqual(cssutils.ser.prefs.keepComments, False)
        self.assertEqual(cssutils.ser.prefs.keepEmptyRules, False)
        self.assertEqual(cssutils.ser.prefs.keepUnkownAtRules, False)
        self.assertEqual(cssutils.ser.prefs.keepUsedNamespaceRulesOnly, True)
        self.assertEqual(cssutils.ser.prefs.lineNumbers, False)
        self.assertEqual(cssutils.ser.prefs.lineSeparator, u'')
        self.assertEqual(cssutils.ser.prefs.listItemSpacer, u'')
        self.assertEqual(cssutils.ser.prefs.omitLastSemicolon, True)
        self.assertEqual(cssutils.ser.prefs.paranthesisSpacer, u'')
        self.assertEqual(cssutils.ser.prefs.propertyNameSpacer, u'')
        self.assertEqual(cssutils.ser.prefs.selectorCombinatorSpacer, u'')
        self.assertEqual(cssutils.ser.prefs.spacer, u'')
        self.assertEqual(cssutils.ser.prefs.validOnly, False)

        css = u'''
    /*1*/
    @import   url(x) tv , print;
    @namespace   prefix "uri";
    @namespace   unused "unused";
    @media  all {}
    @media  all {
        a {}
    }
    @media all "name" {
        a { color: red; }
    }
    @page:left {
    left: 0
    }
    a {}
    prefix|x, a + b > c ~ d , b { top : 1px ; 
        font-family : arial ,  'some' 
        }
    @x  x;
    '''
        s = cssutils.parseString(css)
        cssutils.ser.prefs.keepUnkownAtRules = True
        self.assertEqual(
            s.cssText,
            u'''@import"x"tv,print;@namespace prefix"uri";@media all"name"{a{color:red}}@page :left{left:0}prefix|x,a+b>c~d,b{top:1px;font-family:arial,"some"}@x x;'''
        )
        cssutils.ser.prefs.keepUnkownAtRules = False
        self.assertEqual(
            s.cssText,
            u'''@import"x"tv,print;@namespace prefix"uri";@media all"name"{a{color:red}}@page :left{left:0}prefix|x,a+b>c~d,b{top:1px;font-family:arial,"some"}'''
        )
        # CSSValues
        valuetests = {
            u'  a  a1  a-1  a-1a  ':
            'a a1 a-1 a-1a',
            u'a b 1 c 1em d -1em e':
            u'a b 1 c 1em d -1em e',
            u'  1em  /  5  ':
            u'1em/5',
            u'1em/5':
            u'1em/5',
            u'a 0 a .0 a 0.0 a -0 a -.0 a -0.0 a +0 a +.0 a +0.0':
            u'a 0 a 0 a 0 a 0 a 0 a 0 a 0 a 0 a 0',
            u'a  0px  a  .0px  a  0.0px  a  -0px  a  -.0px  a  -0.0px  a  +0px  a  +.0px  a  +0.0px ':
            u'a 0 a 0 a 0 a 0 a 0 a 0 a 0 a 0 a 0',
            u'a  1  a  .1  a  1.0  a  0.1  a  -1  a  -.1  a  -1.0  a  -0.1  a  +1  a  +.1  a  +1.0':
            u'a 1 a 0.1 a 1 a 0.1 a -1 a -0.1 a -1 a -0.1 a 1 a 0.1 a 1',
            u'  url(x)  f()':
            'url(x) f()',
            u'#112233':
            '#123',
            u'#112234':
            '#112234',
            u'#123':
            '#123',
            u'#123 url() f()':
            '#123 url() f()',
            u'1 +2 +3 -4':
            u'1 2 3 -4'  # ?  
        }
        for test, exp in valuetests.items():
            s = cssutils.parseString(u'a{x:%s}' % test)
            self.assertEqual(u'a{x:%s}' % exp, s.cssText)
Exemplo n.º 56
0
def getSoupView(soup, css, url=''):
    """
	soup
		a BeautifulSoup 4 object
	css
		a CSS StyleSheet string
	
	returns style view
		a dict of tuples
	"""
    sheet = cssutils.parseString(css, href=url)

    cssutils.replaceUrls(sheet,
                         lambda u: urlparse.urljoin(url, u),
                         ignoreImportRules=True)
    view = {}
    specificities = {}  # needed temporarily

    # TODO: filter rules simpler?, add @media
    gens = []
    for i_rule in sheet:
        if i_rule.type == i_rule.IMPORT_RULE:
            cssutils.replaceUrls(i_rule.styleSheet,
                                 lambda u: urlparse.urljoin(i_rule.href, u),
                                 ignoreImportRules=True)
            rules = (rule for rule in i_rule.styleSheet
                     if rule.type == rule.STYLE_RULE)
            gens.append(rules)

    rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
    if gens:
        import itertools
        gens.append(rules)
        rules = itertools.chain(*gens)
    for rule in rules:
        for selector in rule.selectorList:
            #log(0, 'SELECTOR', selector.selectorText)
            # TODO: make this a callback to be able to use other stuff than lxml
            if ':' in selector.selectorText:
                continue  #Ignore pseudo:classes because we can't use them, plus the match when we don't want them to on bs4
            matching = soup.select(selector.selectorText)
            for element in matching:
                ID = id(element)
                if ID not in view:
                    # add initial empty style declatation
                    view[ID] = (element, cssutils.css.CSSStyleDeclaration()
                                )  # @UndefinedVariable
                    specificities[ID] = {}

                for p in rule.style:
                    # update style declaration
                    if p not in view[ID][1]:
                        # setProperty needs a new Property object and
                        # MUST NOT reuse the existing Property
                        # which would be the same for all elements!
                        # see Issue #23
                        view[ID][1].setProperty(p.name, p.value, p.priority)
                        specificities[ID][p.name] = selector.specificity
                        #log(2, view[element].getProperty('color'))

                    else:
                        #log(2, view[element].getProperty('color'))
                        sameprio = (
                            p.priority == view[ID][1].getPropertyPriority(
                                p.name))
                        if not sameprio and bool(p.priority) or (
                                sameprio and selector.specificity >=
                                specificities[ID][p.name]):
                            # later, more specific or higher prio
                            view[ID][1].setProperty(p.name, p.value,
                                                    p.priority)

    return view
Exemplo n.º 57
0
    def _parse_css_string(self, css_body, validate=True):
        if self.cache_css_parsing:
            return _cache_parse_css_string(css_body, validate=validate)

        return cssutils.parseString(css_body, validate=validate)
Exemplo n.º 58
0
    def test_styleSheet(self):
        "CSSImportRule.styleSheet"

        def fetcher(url):
            if url == "/root/level1/anything.css":
                return None, '@import "level2/css.css" "title2";'
            else:
                return None, 'a { color: red }'

        parser = cssutils.CSSParser(fetcher=fetcher)
        sheet = parser.parseString('''@charset "ascii";
                                   @import "level1/anything.css" tv "title";''',
                                   href='/root/')

        self.assertEqual(sheet.href, '/root/')

        ir = sheet.cssRules[1]
        self.assertEqual(ir.href, 'level1/anything.css')
        self.assertEqual(ir.styleSheet.href, '/root/level1/anything.css')
        # inherits ascii as no self charset is set
        self.assertEqual(ir.styleSheet.encoding, 'ascii')
        self.assertEqual(ir.styleSheet.ownerRule, ir)
        self.assertEqual(ir.styleSheet.media.mediaText, 'tv')
        self.assertEqual(ir.styleSheet.parentStyleSheet, None)  # sheet
        self.assertEqual(ir.styleSheet.title, 'title')
        self.assertEqual(
            ir.styleSheet.cssText,
            '@charset "ascii";\n@import "level2/css.css" "title2";'.encode())

        ir2 = ir.styleSheet.cssRules[1]
        self.assertEqual(ir2.href, 'level2/css.css')
        self.assertEqual(ir2.styleSheet.href, '/root/level1/level2/css.css')
        # inherits ascii as no self charset is set
        self.assertEqual(ir2.styleSheet.encoding, 'ascii')
        self.assertEqual(ir2.styleSheet.ownerRule, ir2)
        self.assertEqual(ir2.styleSheet.media.mediaText, 'all')
        self.assertEqual(ir2.styleSheet.parentStyleSheet, None)  #ir.styleSheet
        self.assertEqual(ir2.styleSheet.title, 'title2')
        self.assertEqual(
            ir2.styleSheet.cssText,
            '@charset "ascii";\na {\n    color: red\n    }'.encode())

        sheet = cssutils.parseString('@import "CANNOT-FIND.css";')
        ir = sheet.cssRules[0]
        self.assertEqual(ir.href, "CANNOT-FIND.css")
        self.assertEqual(type(ir.styleSheet), cssutils.css.CSSStyleSheet)

        def fetcher(url):
            if url.endswith('level1.css'):
                return None, '@charset "ascii"; @import "level2.css";'.encode()
            else:
                return None, 'a { color: red }'.encode()

        parser = cssutils.CSSParser(fetcher=fetcher)

        sheet = parser.parseString(
            '@charset "iso-8859-1";@import "level1.css";')
        self.assertEqual(sheet.encoding, 'iso-8859-1')

        sheet = sheet.cssRules[1].styleSheet
        self.assertEqual(sheet.encoding, 'ascii')

        sheet = sheet.cssRules[1].styleSheet
        self.assertEqual(sheet.encoding, 'ascii')
def getdata(html) : 

 try:  
  soup = BeautifulSoup(html,'html.parser') #BeautifulSoup_to_get_data
  
  title = soup.find('title')
  business_name = soup.find('span', attrs = {'class' : 'fn'}).text 
  rating = soup.find('span', attrs = {'class' : 'value-titles'}).text 
  review_ele = soup.findAll(class_= 'allratingM')
  total_rating_count=soup.find('span', attrs = {'class' : 'votes'})
  long_addr= soup.find('span', attrs = {'id' : 'fulladdress'})
  long_addr=long_addr.find('span', attrs = {'class' : 'lng_add'}).text
  category=soup.findAll(class_= 'lng_als_lst')
  pay_modes = soup.findAll(class_= 'lng_mdpay')
  also_listed=soup.findAll(class_= 'lng_als_lst')
  Year=soup.findAll('ul', attrs = {'class' : 'alstdul'})
  web=soup.findAll('span', attrs = {'class' : 'mreinfp'}) 
 

 #map phone number digits with image id
  style=soup.findAll('style', attrs = {'type' : 'text/css'})
 
 except:
   print("Error : Page fromat changed")

 phone_map={} 
 
 sheet = cssutils.parseString(str(style[len(style)-1].text))
 
 for rule in sheet:
    try:
      if 'before' not in str(rule.selectorText):
       continue
      for property in rule.style:
       name = property.name    
       value = property.value.strip()
       value=ord(value[1])-643073 
       
       if(value==15):
        value=9
     
      phone_map[rule.selectorText]=value 
    except:
      print('fail')
 
 #maping -> complete 
 
 
 #extract all the phone no present
 phone_img=soup.findAll('span', attrs = {'class' : 'mobilesv'})
 
 phone_number=""
 phone_number_final=[]
 count_phone_num=0
 
 is_exist_phone={} 
 for phone_img_div in phone_img: 
  try:
   
     if(count_phone_num==11 and phone_number[0]=='0' and phone_number[1]=='1' and phone_number[2]=='1'):
      if(not phone_number in is_exist_phone.values()):
        phone_number_final.append(phone_number)

      is_exist_phone[phone_number]=phone_number
      phone_number=""
      count_phone_num=0
     
     elif(count_phone_num==13):
      if(not phone_number in is_exist_phone.values()) :
        phone_number_final.append(phone_number)
      
      is_exist_phone[phone_number]=phone_number
      phone_number=""
      count_phone_num=0

     for key, value in phone_map.items():
      if str(phone_img_div['class'][1]) in key:
        if(value==16):
         phone_number=phone_number+"+"
         count_phone_num=count_phone_num+1 
         continue
        phone_number=phone_number+str(value)
        count_phone_num=count_phone_num+1 
        break
  except:
    print("")
 
 data['phone_number']=phone_number_final

 try:
   data['JustDail_business_title']=title.text.strip()
 except:
   data['JustDail_business_title']='None'  

 try:
   data['business_name']=business_name
 except:
   data['business_name']='None'
 #websiite
 try:
   website=web[len(web)-1].findChildren('a', recursive=False)[0].text.strip()
 except:
   website='None'

 #Year_Established
 try:
  Year_Established=Year[len(Year)-1].findChildren("li" , recursive=False)[0].text.strip()
 except:
  Year_Established='None'
 
 #rating
 if(rating):
  try:
   data['total_rating']=rating
  except:
   data['total_rating']='None'
 
 try:
  data['total_rating_count']=total_rating_count.text
 except:
  data['total_rating_count']="None" 

 #long_addr
 if(long_addr):
  try:
   data['long_addr']=long_addr
  except:
   data['long_addr']='None'

 #website_url
 if(website):
  try:
    data['website']=website
  except:
    data['website']='None'
 if(Year_Established):
   try:
     data['Year_Established']=Year_Established
   except:
     data['Year_Established']='None'

  
 #payment_methds
 pay_string=[]
 for pay in pay_modes:
   pay_string.append((pay.text).strip())

 if(pay_string):
   try:
     data['Modes_of_payment']=pay_string
   except:
     data['Modes_of_payment']='None' 

 #catgry
 cat_string=[]
 for cat in category:
  cat_string.append((cat.text).strip())

 if(cat_string):
   try:
     data['category']=cat_string
   except:
     data['category']='None' 

 #also_listed_in
 also_listed_string=[]
 for also_list in also_listed:
   also_listed_string.append((also_list.text).strip())

 if(also_listed_string):
   try:
     data['Also_Listed_in']=also_listed_string
   except:
     data['Also_Listed_in']='None' 
 

 #reviews/rating
 review=[]   # all user_reviews
 for div in review_ele:
    dic={} 
    name=div.find('span', attrs = {'class' : 'rName'})
    user_rating= div.find('span', attrs = {'class' : 'star_m'})
    user_rating_date= div.find('span', attrs = {'class' : 'dtyr'})
    user_review=div.find('p', attrs = {'class' : 'rwopinion2'})
    
    if(name):
     try:
        dic['user_name']=name.text
     except:
         dic['user_name']='None'

    if(user_rating):
     try:
      review_rat= user_rating['aria-label']
      dic['user_rating'] = review_rat[len(review_rat)-1]    
     except:
      dic['user_rating']='0'

    if(user_rating_date):
     try :
      dic['user_rating_date']=user_rating_date['content']
     except:
      dic['user_rating_date']="None"
    if(user_review):
      try:
        dic['user_review']=user_review.text
      except:
        dic['user_review']='None'
    try:
     if(dic):
      review.append(dic)
    except:
     print('Cannot add record')    

 data['reviews']=review
Exemplo n.º 60
0
    def test_resolveImports(self):
        "cssutils.resolveImports(sheet)"
        if mock:
            self._tempSer()
            cssutils.ser.prefs.useMinified()

            a = u'@charset "iso-8859-1";@import"b.css";\xe4{color:green}'.encode(
                'iso-8859-1')
            b = u'@charset "ascii";\\E4 {color:red}'.encode('ascii')

            # normal
            m = mock.Mock()
            with mock.patch('cssutils.util._defaultFetcher', m):
                m.return_value = (None, b)
                s = cssutils.parseString(a)

                # py3 TODO
                self.assertEqual(a, s.cssText)
                self.assertEqual(b, s.cssRules[1].styleSheet.cssText)

                c = cssutils.resolveImports(s)

                # py3 TODO
                self.assertEqual(
                    u'\xc3\xa4{color:red}\xc3\xa4{color:green}'.encode(
                        'iso-8859-1'), c.cssText)

                c.encoding = 'ascii'
                self.assertEqual(
                    ur'@charset "ascii";\E4 {color:red}\E4 {color:green}'.
                    encode(), c.cssText)

            # b cannot be found
            m = mock.Mock()
            with mock.patch('cssutils.util._defaultFetcher', m):
                m.return_value = (None, None)
                s = cssutils.parseString(a)

                # py3 TODO
                self.assertEqual(a, s.cssText)
                self.assertEqual(cssutils.css.CSSStyleSheet,
                                 type(s.cssRules[1].styleSheet))
                c = cssutils.resolveImports(s)
                # py3 TODO
                self.assertEqual(
                    u'@import"b.css";\xc3\xa4{color:green}'.encode(
                        'iso-8859-1'), c.cssText)

            # @import with media
            a = u'@import"b.css";@import"b.css" print, tv ;@import"b.css" all;'
            b = u'a {color: red}'
            m = mock.Mock()
            with mock.patch('cssutils.util._defaultFetcher', m):
                m.return_value = (None, b)
                s = cssutils.parseString(a)

                c = cssutils.resolveImports(s)

                self.assertEqual(
                    'a{color:red}@media print,tv{a{color:red}}a{color:red}'.
                    encode(), c.cssText)

            # cannot resolve with media => keep original
            a = u'@import"b.css"print;'
            b = u'@namespace "http://example.com";'
            m = mock.Mock()
            with mock.patch('cssutils.util._defaultFetcher', m):
                m.return_value = (None, b)
                s = cssutils.parseString(a)
                c = cssutils.resolveImports(s)
                self.assertEqual(a.encode(), c.cssText)

            # urls are adjusted too, layout:
            # a.css
            # c.css
            # img/img.gif
            # b/
            #     b.css
            #     subimg/subimg.gif
            a = u'''
                 @import"b/b.css";
                 a {
                     x: url(/img/abs.gif);
                     y: url(img/img.gif);
                     z: url(b/subimg/subimg.gif);
                     }'''

            def fetcher(url):
                c = {
                    'b.css':
                    u'''
                         @import"../c.css";
                         b {
                             x: url(/img/abs.gif);
                             y: url(../img/img.gif);
                             z: url(subimg/subimg.gif);
                             }''',
                    'c.css':
                    u'''
                         c {
                             x: url(/img/abs.gif);
                             y: url(./img/img.gif);
                             z: url(./b/subimg/subimg.gif);
                             }'''
                }
                return 'utf-8', c[os.path.split(url)[1]]

            @mock.patch.object(cssutils.util, '_defaultFetcher', new=fetcher)
            def do():
                s = cssutils.parseString(a)
                r = cssutils.resolveImports(s)
                return s, r

            s, r = do()

            cssutils.ser.prefs.useDefaults()
            cssutils.ser.prefs.keepComments = False
            self.assertEqual(
                u'''c {
    x: url(/img/abs.gif);
    y: url(img/img.gif);
    z: url(b/subimg/subimg.gif)
    }
b {
    x: url(/img/abs.gif);
    y: url(img/img.gif);
    z: url(b/subimg/subimg.gif)
    }
a {
    x: url(/img/abs.gif);
    y: url(img/img.gif);
    z: url(b/subimg/subimg.gif)
    }'''.encode(), r.cssText)

            cssutils.ser.prefs.useDefaults()
        else:
            self.assertEqual(False, u'Mock needed for this test')