Python find_charset示例，leap.mail.utils.find_charset Python示例

示例#1

0

显示文件

    def getHeaders(self, negate, *names):
        """
        Retrieve a group of message headers.

        :param names: The names of the headers to retrieve or omit.
        :type names: tuple of str

        :param negate: If True, indicates that the headers listed in names
                       should be omitted from the return value, rather
                       than included.
        :type negate: bool

        :return: A mapping of header field names to header field values
        :rtype: dict
        """
        # TODO split in smaller methods
        # XXX refactor together with MessagePart method

        headers = self._get_headers()
        if not headers:
            logger.warning("No headers found")
            return {str('content-type'): str('')}

        names = map(lambda s: s.upper(), names)
        if negate:
            cond = lambda key: key.upper() not in names
        else:
            cond = lambda key: key.upper() in names

        if isinstance(headers, list):
            headers = dict(headers)

        # default to most likely standard
        charset = find_charset(headers, "utf-8")
        headers2 = dict()
        for key, value in headers.items():
            # twisted imap server expects *some* headers to be lowercase
            # We could use a CaseInsensitiveDict here...
            if key.lower() == "content-type":
                key = key.lower()

            if not isinstance(key, str):
                key = key.encode(charset, 'replace')
            if not isinstance(value, str):
                value = value.encode(charset, 'replace')

            if value.endswith(";"):
                # bastards
                value = value[:-1]

            # filter original dict by negate-condition
            if cond(key):
                headers2[key] = value
        return headers2

示例#2

0

显示文件

文件： messageparts.py 项目： pmaia/leap_mail

    def getBodyFile(self):
        """
        Retrieve a file object containing only the body of this message.

        :return: file-like object opened for reading
        :rtype: StringIO
        """
        fd = StringIO.StringIO()
        if not empty(self._pmap):
            multi = self._pmap.get('multi')
            if not multi:
                phash = self._pmap.get("phash", None)
            else:
                pmap = self._pmap.get('part_map')
                first_part = pmap.get('1', None)
                if not empty(first_part):
                    phash = first_part['phash']
                else:
                    phash = None

            if phash is None:
                logger.warning("Could not find phash for this subpart!")
                payload = ""
            else:
                payload = self._get_payload_from_document_memoized(phash)
                if empty(payload):
                    payload = self._get_payload_from_document(phash)

        else:
            logger.warning("Message with no part_map!")
            payload = ""

        if payload:
            content_type = self._get_ctype_from_document(phash)
            charset = find_charset(content_type)
            if charset is None:
                charset = self._get_charset(payload)
            try:
                if isinstance(payload, unicode):
                    payload = payload.encode(charset)
            except UnicodeError as exc:
                logger.error(
                    "Unicode error, using 'replace'. {0!r}".format(exc))
                payload = payload.encode(charset, 'replace')

        fd.write(payload)
        fd.seek(0)
        return fd

示例#3

0

显示文件

文件： messages.py 项目： Moscarda/leap_mail

def _format_headers(headers, negate, *names):
    # current server impl. expects content-type to be present, so if for
    # some reason we do not have headers, we have to return at least that
    # one
    if not headers:
        logger.warning("No headers found")
        return {str('content-type'): str('')}

    names = map(lambda s: s.upper(), names)

    if negate:
        def cond(key):
            return key.upper() not in names
    else:
        def cond(key):
            return key.upper() in names

    if isinstance(headers, list):
        headers = dict(headers)

    # default to most likely standard
    charset = find_charset(headers, "utf-8")

    # We will return a copy of the headers dictionary that
    # will allow case-insensitive lookups. In some parts of the twisted imap
    # server code the keys are expected to be in lower case, and in this way
    # we avoid having to convert them.

    _headers = CaseInsensitiveDict()
    for key, value in headers.items():
        if not isinstance(key, str):
            key = key.encode(charset, 'replace')
        if not isinstance(value, str):
            value = value.encode(charset, 'replace')

        if value.endswith(";"):
            # bastards
            value = value[:-1]

        # filter original dict by negate-condition
        if cond(key):
            _headers[key] = value

    return _headers

示例#4

0

显示文件

    def getBodyFile(self):
        """
        Retrieve a file object containing only the body of this message.

        :return: file-like object opened for reading
        :rtype: StringIO
        """
        def write_fd(body):
            fd.write(body)
            fd.seek(0)
            return fd

        # TODO refactor with getBodyFile in MessagePart

        fd = StringIO.StringIO()

        if self.bdoc is not None:
            bdoc_content = self.bdoc.content
            if empty(bdoc_content):
                logger.warning("No BDOC content found for message!!!")
                return write_fd("")

            body = bdoc_content.get(self.RAW_KEY, "")
            content_type = bdoc_content.get('content-type', "")
            charset = find_charset(content_type)
            if charset is None:
                charset = self._get_charset(body)
            try:
                if isinstance(body, unicode):
                    body = body.encode(charset)
            except UnicodeError as exc:
                logger.error(
                    "Unicode error, using 'replace'. {0!r}".format(exc))
                logger.debug("Attempted to encode with: %s" % charset)
                body = body.encode(charset, 'replace')
            finally:
                return write_fd(body)

        # We are still returning funky characters from here.
        else:
            logger.warning("No BDOC found for message.")
            return write_fd("")

示例#5

0

显示文件

文件： messages.py 项目： jnews0n/leap_mail

def _format_headers(headers, negate, *names):
    # current server impl. expects content-type to be present, so if for
    # some reason we do not have headers, we have to return at least that
    # one
    if not headers:
        logger.warning("No headers found")
        return {str('content-type'): str('')}

    names = map(lambda s: s.upper(), names)
    if negate:
        cond = lambda key: key.upper() not in names
    else:
        cond = lambda key: key.upper() in names

    if isinstance(headers, list):
        headers = dict(headers)

    # default to most likely standard
    charset = find_charset(headers, "utf-8")

    # We will return a copy of the headers dictionary that
    # will allow case-insensitive lookups. In some parts of the twisted imap
    # server code the keys are expected to be in lower case, and in this way
    # we avoid having to convert them.

    _headers = CaseInsensitiveDict()
    for key, value in headers.items():
        if not isinstance(key, str):
            key = key.encode(charset, 'replace')
        if not isinstance(value, str):
            value = value.encode(charset, 'replace')

        if value.endswith(";"):
            # bastards
            value = value[:-1]

        # filter original dict by negate-condition
        if cond(key):
            _headers[key] = value

    return _headers

示例#6

0

显示文件

文件： mail.py 项目： Moscarda/leap_mail

def _encode_payload(payload, ctype=""):
    """
    Properly encode an unicode payload (which can be string or unicode) as a
    string.

    :param payload: the payload to encode. currently soledad returns unicode
                    strings.
    :type payload: basestring
    :param ctype: optional, the content of the content-type header for this
                  payload.
    :type ctype: str
    :rtype: str
    """
    # TODO Related, it's proposed that we're able to pass
    # the encoding to the soledad documents. Better to store the charset there?
    # FIXME -----------------------------------------------
    # this need a dedicated test-suite
    charset = find_charset(ctype)

    # XXX get from mail headers if not multipart!
    # Beware also that we should pass the proper encoding to
    # soledad when it's creating the documents.
    # if not charset:
    # charset = get_email_charset(payload)
    # -----------------------------------------------------

    if not charset:
        charset = "utf-8"

    try:
        if isinstance(payload, unicode):
            payload = payload.encode(charset)
    except UnicodeError as exc:
        logger.error(
            "Unicode error, using 'replace'. {0!r}".format(exc))
        payload = payload.encode(charset, 'replace')
    return payload

示例#7

0

显示文件

def _encode_payload(payload, ctype=""):
    """
    Properly encode an unicode payload (which can be string or unicode) as a
    string.

    :param payload: the payload to encode. currently soledad returns unicode
                    strings.
    :type payload: basestring
    :param ctype: optional, the content of the content-type header for this
                  payload.
    :type ctype: str
    :rtype: str
    """
    # TODO Related, it's proposed that we're able to pass
    # the encoding to the soledad documents. Better to store the charset there?
    # FIXME -----------------------------------------------
    # this need a dedicated test-suite
    charset = find_charset(ctype)

    # XXX get from mail headers if not multipart!
    # Beware also that we should pass the proper encoding to
    # soledad when it's creating the documents.
    # if not charset:
    # charset = get_email_charset(payload)
    #------------------------------------------------------

    if not charset:
        charset = "utf-8"

    try:
        if isinstance(payload, unicode):
            payload = payload.encode(charset)
    except UnicodeError as exc:
        logger.error(
            "Unicode error, using 'replace'. {0!r}".format(exc))
        payload = payload.encode(charset, 'replace')
    return payload