Пример #1
0
 def _decode_zip_filename(self, name):
     for enc in ('cp866', 'cp1251', 'utf-8'):
         try:
             return to_unicode(name, enc)
         except UnicodeDecodeError:
             pass
     return name
Пример #2
0
    def get_source(self, name):

        logging.debug('ZipLoader.get_source %s', name)

        if self.base_path:
            name = path.join(self.base_path, name)
            logging.debug('ZipLoader.get_source has base_path, result name is %s', name)

        self._unpack_zip()

        if isinstance(name, str):
            name = to_unicode(name, 'utf-8')

        data = self.mapping.get(name, None)

        if data is not None:
            return data, name

        original_name = self._filenames.get(name)

        logging.debug('ZipLoader.get_source original_name=%s', original_name)

        if original_name is None:
            raise FileNotFound(name)

        data = self.zipfile.read(original_name)

        logging.debug('ZipLoader.get_source returns %s bytes', len(data))
        return data, name
Пример #3
0
 def encode_header(self, value):
     value = to_unicode(value, charset=self.charset)
     if isinstance(value, string_types):
         value = value.rstrip()
         _r = Header(value, self.charset)
         return str(_r)
     else:
         return value
Пример #4
0
def test_after_build():

    AFTER_BUILD_HEADER = 'X-After-Build'

    def my_after_build(original_message, built_message):
        built_message[AFTER_BUILD_HEADER] = '1'

    kwargs = common_email_data()
    m = emails.Message(**kwargs)
    m.after_build = my_after_build

    s = m.as_string()
    print("type of message.as_string() is {0}".format(type(s)))
    assert AFTER_BUILD_HEADER in to_unicode(s, 'utf-8')
Пример #5
0
def test_after_build():

    AFTER_BUILD_HEADER = 'X-After-Build'

    def my_after_build(original_message, built_message):
        built_message[AFTER_BUILD_HEADER] = '1'

    kwargs = common_email_data()
    m = emails.Message(**kwargs)
    m.after_build = my_after_build

    s = m.as_string()
    print("type of message.as_string() is {0}".format(type(s)))
    assert AFTER_BUILD_HEADER in to_unicode(s, 'utf-8')
Пример #6
0
def parse_name_and_email(obj, encoding='utf-8'):
    # In:  '*****@*****.**' or  '"John Smith" <*****@*****.**>' or ('John Smith', '*****@*****.**')
    # Out: (u'John Smith', u'*****@*****.**')

    _realname = ''
    _email = ''

    if isinstance(obj, (list, tuple)):
        if len(obj) == 2:
            _realname, _email = obj

    elif isinstance(obj, string_types):
        _realname, _email = parseaddr(obj)

    else:
        raise ValueError("Can not parse_name_and_email from %s" % obj)

    if isinstance(_realname, bytes):
        _realname = to_unicode(_realname, encoding)

    if isinstance(_email, bytes):
        _email = to_unicode(_email, encoding)

    return _realname or None, _email or None
Пример #7
0
    def start_load_file(self, html, encoding="utf-8"):
        """
        Set some params and load start page
        """
        if hasattr(html, 'read'):
            html = html.read()

        if not isinstance(html, text_type):
            html = to_unicode(html, encoding)

        #print(__name__, type(html))
        html = html.replace('\r\n', '\n') # Remove \r, or we'll get much &#13;
        self.html_content = html
        self.html_encoding = encoding # ?
        self.start_url = None
        self.base_url = None
        self.headers = None
Пример #8
0
    def patch_message(self, message):
        """
        Some SMTP requires from and to emails
        """

        if self.from_email:
            message.mail_from = (message.mail_from[0], self.from_email)

        if self.to_email:
            message.mail_to = self.to_email

        # TODO: this code breaks template in subject; fix it
        if not to_unicode(message.subject).startswith(self.subject_prefix) :
            message.subject = " ".join([self.subject_prefix, message.subject,
                                        'py%s' % sys.version[:3]])

        message._headers['X-Test-Date'] = datetime.datetime.utcnow().isoformat()
        message._headers['X-Python-Version'] = "%s/%s" % (platform.python_version(), platform.platform())

        return message
Пример #9
0
    def patch_message(self, message):
        """
        Some SMTP requires from and to emails
        """

        if self.from_email:
            message.mail_from = (message.mail_from[0], self.from_email)

        if self.to_email:
            message.mail_to = self.to_email

        # TODO: this code breaks template in subject; fix it
        if not to_unicode(message.subject).startswith(self.subject_prefix) :
            message.subject = " ".join([self.subject_prefix, message.subject,
                                        'py%s' % sys.version[:3]])

        message._headers['X-Test-Date'] = datetime.datetime.utcnow().isoformat()
        message._headers['X-Python-Version'] = "%s/%s" % (platform.python_version(), platform.platform())

        return message
Пример #10
0
def sanitize_address(addr, encoding):
    if isinstance(addr, string_types):
        addr = parseaddr(to_unicode(addr))
    nm, addr = addr
    # This try-except clause is needed on Python 3 < 3.2.4
    # http://bugs.python.org/issue14291
    try:
        nm = Header(nm, encoding).encode()
    except UnicodeEncodeError:
        nm = Header(nm, 'utf-8').encode()
    try:
        addr.encode('ascii')
    except UnicodeEncodeError:  # IDN
        if '@' in addr:
            localpart, domain = addr.split('@', 1)
            localpart = str(Header(localpart, encoding))
            domain = domain.encode('idna').decode('ascii')
            addr = '@'.join([localpart, domain])
        else:
            addr = Header(addr, encoding).encode()
    return formataddr((nm, addr))
Пример #11
0
def guess_charset(headers, html):

    # guess by http headers
    if headers:
        #print(__name__, "guess_charset has headers", headers)
        content_type = headers['content-type']
        if content_type:
            _, params = cgi.parse_header(content_type)
            r = params.get('charset', None)
            if r:
                return r

    # guess by html meta
    #print(__name__, "guess_charset html=", html[:1024])
    for s in RE_META.findall(html):
        for x in RE_INSIDE_META.findall(s):
            for charset in RE_CHARSET.findall(x):
                return to_unicode(charset)

    # guess by chardet
    return chardet.detect(html)['encoding']
Пример #12
0
 def log(self, level, *msg):
     if self.DEBUG:
         print(('%s- %s' % (level * '\t ', ' '.join((to_unicode(m or '') for m in msg)))))
Пример #13
0
def normalize_html(s):
    return "".join(to_unicode(s).split())
Пример #14
0
 def get_link(self):
     r = self.el.uri
     if self.encoding:
         r = to_unicode(self.el.uri, self.encoding)
     return r
Пример #15
0
 def get_link(self):
     #print(__name__, "ElementWithLink encoding=", self.encoding)
     r = self.el.get(self.LINK_ATTR_NAME)
     if self.encoding:
         r = to_unicode(r, self.encoding)
     return r
Пример #16
0
 def update_tag(self):
     if self.element is not None:
         self._concatenate_sheets()
         cssText = self._cached_stylesheet.cssText
         cssText = cssText and to_unicode(cssText, 'utf-8') or ''
         self.element.text = cssText
Пример #17
0
    def getView(self, document, sheet, media='all', name=None, styleCallback=None):
        """
        document
            a DOM document, currently an lxml HTML document
        sheet
            a CSS StyleSheet object, currently cssutils sheet
        media: optional
            TODO: view for which media it should be
        name: optional
            TODO: names of sheets only
        styleCallback: optional
            should return css.CSSStyleDeclaration of inline styles, for html
            a style declaration for ``element@style``. Gets one parameter
            ``element`` which is the relevant DOMElement

        returns style view
            a dict of {DOMElement: css.CSSStyleDeclaration} for html
        """

        styleCallback = styleCallback or self.styleattribute

        _unmergable_rules = CSSStyleSheet()

        view = {}
        specificities = {}  # needed temporarily

        # TODO: filter rules simpler?, add @media
        rules = (rule for rule in sheet if rule.type == rule.STYLE_RULE)
        for rule in rules:
            for selector in rule.selectorList:
                self.log(0, 'SELECTOR', selector.selectorText)
                # TODO: make this a callback to be able to use other stuff than lxml
                try:
                    cssselector = CSSSelector(selector.selectorText)
                except (ExpressionError, NotImplementedError) as e:
                    _unmergable_rules.add(CSSStyleRule(selectorText=selector.selectorText,
                                                       style=rule.style))
                    continue

                matching = cssselector.evaluate(document)

                for element in matching:

                        if element.tag in self.NONVISUAL_TAGS:
                            continue

                        # add styles for all matching DOM elements
                        self.log(1, 'ELEMENT', id(element), element.text)

                        if element not in view:
                            # add initial empty style declatation
                            view[element] = CSSStyleDeclaration()
                            specificities[element] = {}

                            # and add inline @style if present
                            inlinestyle = styleCallback(element)
                            if inlinestyle:
                                for p in inlinestyle:
                                    # set inline style specificity
                                    view[element].setProperty(p)
                                    specificities[element][p.name] = (1, 0, 0, 0)

                        for p in rule.style:
                            # update style declaration
                            if p not in view[element]:
                                # setProperty needs a new Property object and
                                # MUST NOT reuse the existing Property
                                # which would be the same for all elements!
                                # see Issue #23
                                view[element].setProperty(p.name, p.value, p.priority)
                                specificities[element][p.name] = selector.specificity
                                self.log(2, view[element].getProperty('color'))

                            else:
                                self.log(2, view[element].getProperty('color'))
                                sameprio = (p.priority ==
                                            view[element].getPropertyPriority(p.name))
                                if not sameprio and bool(p.priority) or (
                                   sameprio and selector.specificity >=
                                        specificities[element][p.name]):
                                    # later, more specific or higher prio
                                    view[element].setProperty(p.name, p.value, p.priority)

        _unmergable_css = _unmergable_rules.cssText
        if _unmergable_css:
            e = etree.Element('style')
            # print __name__, _unmergable_css.__repr__()
            e.text = to_unicode(_unmergable_css, 'utf-8')
            body = document.find('body') or document
            body.insert(0, e)  # add <style> right into body

        return view
Пример #18
0
 def update(self):
     cssText = self.style.cssText
     if isinstance(cssText, str):
         cssText = to_unicode(cssText, 'utf-8')
     self.el.set('style', cssText)