Exemplo n.º 1
0
def decode_mail_header(value, default_charset='us-ascii'):
    """
    Decode a header value into a unicode string.
    """
    try:
        headers = decode_header(value)
    except email.errors.HeaderParseError:
        return str_decode(str_encode(value, default_charset, 'replace'),
                          default_charset)
    else:
        for index, (text, charset) in enumerate(headers):
            try:
                logger.debug(
                    "Mail header no. {index}: {data} encoding {charset}".
                    format(index=index,
                           data=str_decode(text, charset or 'utf-8',
                                           'replace'),
                           charset=charset))
                headers[index] = str_decode(text, charset or default_charset,
                                            'replace')
            except LookupError:
                # if the charset is unknown, force default
                headers[index] = str_decode(text, default_charset, 'replace')

        return ''.join(headers)
Exemplo n.º 2
0
Arquivo: parser.py Projeto: aino/imbox
def decode_mail_header(value, default_charset='us-ascii'):
    """
    Decode a header value into a unicode string.
    """
    try:
        headers = decode_header(value)
    except email.errors.HeaderParseError:
        return str_decode(str_encode(value, default_charset, 'replace'), default_charset)
    else:
        for index, (text, charset) in enumerate(headers):
            try:
                headers[index] = str_decode(text, charset or default_charset, 'replace')
            except LookupError:
                # if the charset is unknown, force default
                headers[index] = str_decode(text, default_charset, 'replace')
        return ''.join(headers)
Exemplo n.º 3
0
def decode_param(param):
    name, v = param.split('=', 1)
    values = v.split('\n')
    value_results = []
    for value in values:
        match = re.search(r'=\?((?:\w|-)+)\?(Q|B)\?(.+)\?=', value)
        if match:
            encoding, type_, code = match.groups()
            if type_ == 'Q':
                value = quopri.decodestring(code)
            elif type_ == 'B':
                value = base64.decodestring(code)
            value = str_encode(value, encoding)
            value_results.append(value)
            if value_results:
                v = ''.join(value_results)
    return name, v
Exemplo n.º 4
0
def decode_param(param):
    name, v = param.split('=', 1)
    values = v.split('\n')
    value_results = []
    for value in values:
        match = re.search(r'=\?((?:\w|-)+)\?(Q|B)\?(.+)\?=', value)
        if match:
            encoding, type_, code = match.groups()
            if type_ == 'Q':
                value = quopri.decodestring(code)
            elif type_ == 'B':
                value = base64.decodestring(code)
            value = str_encode(value, encoding)
            value_results.append(value)
            if value_results:
                v = ''.join(value_results)
    return name, v
Exemplo n.º 5
0
def decode_param(param):
    name, v = param.split('=', 1)
    values = v.split('\n')
    value_results = []
    for value in values:
        match = re.search(r'=\?((?:\w|-)+)\?([QB])\?(.+)\?=', value)
        if match:
            encoding, type_, code = match.groups()
            if type_ == 'Q':
                value = quopri.decodestring(code)
            elif type_ == 'B':
                value = base64.decodebytes(code.encode())
            value = str_encode(value, encoding)
            value_results.append(value)
            if value_results:
                v = ''.join(value_results)
    logger.debug("Decoded parameter {} - {}".format(name, v))
    return name, v
Exemplo n.º 6
0
def decode_param(param):
    name, v = param.split('=', 1)
    values = v.split('\n')
    value_results = []
    for value in values:
        match = re.search(r'=\?((?:\w|-)+)\?([QB])\?(.+)\?=', value)
        if match:
            encoding, type_, code = match.groups()
            if type_ == 'Q':
                value = quopri.decodestring(code)
            elif type_ == 'B':
                value = base64.decodebytes(code.encode())
            value = str_encode(value, encoding)
            value_results.append(value)
            if value_results:
                v = ''.join(value_results)
    logger.debug("Decoded parameter {} - {}".format(name, v))
    return name, v
Exemplo n.º 7
0
def decode_mail_header(value, default_charset='us-ascii'):
    """
    Decode a header value into a unicode string.
    """
    try:
        headers = decode_header(value)
    except email.errors.HeaderParseError:
        return str_decode(str_encode(value, default_charset, 'replace'),
                          default_charset)
    else:
        for index, (text, charset) in enumerate(headers):
            try:
                headers[index] = str_decode(text, charset or default_charset,
                                            'replace')
            except LookupError:
                # if the charset is unknown, force default
                headers[index] = str_decode(text, default_charset, 'replace')

        return ''.join(headers)
Exemplo n.º 8
0
def decode_param(param):
    name, v = param.split('=', 1)
    values = v.split('\n')
    value_results = []
    for value in values:
        match = re.search(r'=\?((?:\w|-)+)\?(Q|B)\?(.+)\?=', value)
        if match:
            encoding, type_, code = match.groups()
            try:
                if isinstance(code, str):
                    code = code.encode('ascii')
            except Exception:
                pass
            if type_ == 'Q':
                value = quopri.decodestring(code)
            elif type_ == 'B':
                value = base64.decodebytes(code)
            value = str_encode(value, encoding)
            value_results.append(value)
            if value_results:
                v = ''.join(value_results)
    return name, v
Exemplo n.º 9
0
def decode_mail_header(value, default_charset='us-ascii'):
    """
    Decode a header value into a unicode string.
    """
    try:
        headers = decode_header(value)
    except email.errors.HeaderParseError:
        return str_decode(str_encode(value, default_charset, 'replace'), default_charset)
    else:
        for index, (text, charset) in enumerate(headers):
            logger.debug("Mail header no. {index}: {data} encoding {charset}".format(
                index=index,
                data=str_decode(text, charset or 'utf-8', 'replace'),
                charset=charset))
            try:
                headers[index] = str_decode(text, charset or default_charset,
                                            'replace')
            except LookupError:
                # if the charset is unknown, force default
                headers[index] = str_decode(text, default_charset, 'replace')

        return ''.join(headers)
Exemplo n.º 10
0
def decode_param(param):
    name, v = param.split('=', 1)
    values = v.split('\n')
    value_results = []
    for value in values:
        match = re.search(r'=\?((?:\w|-)+)\?([QB])\?(.+)\?=', value)
        if match:
            encoding, type_, code = match.groups()
            if type_ == 'Q':
                code = ''.join(
                    ' ' if ord(c) == 160 else c
                    for c in code)  # ord(' ') == 160 vs ord(' ') == 32
                value = quopri.decodestring(code)
            elif type_ == 'B':
                value = base64.decodebytes(code.encode())
            # value = str_encode(value, encoding)
            # 修复 UnicodeError 问题
            value = str_encode(value, encoding, 'ignore')
            value_results.append(value)
            if value_results:
                v = ''.join(value_results)
    logger.debug("Decoded parameter {} - {}".format(name, v))
    return name, v
Exemplo n.º 11
0
def parse_email(raw_email):
    if isinstance(raw_email, binary_type):
        raw_email = str_encode(raw_email, 'utf-8')
    try:
        email_message = email.message_from_string(raw_email)
    except UnicodeEncodeError:
        email_message = email.message_from_string(raw_email.encode('utf-8'))
    maintype = email_message.get_content_maintype()
    parsed_email = {}

    parsed_email['raw_email'] = raw_email

    body = {
        "plain": [],
        "html": []
    }
    attachments = []

    if maintype in ('multipart', 'image'):
        logger.debug("Multipart message. Will process parts.")
        for part in email_message.walk():
            content_type = part.get_content_type()
            part_maintype = part.get_content_maintype()
            content_disposition = part.get('Content-Disposition', None)
            if content_disposition or not part_maintype == "text":
                content = part.get_payload(decode=True)
            else:
                content = decode_content(part)

            is_inline = content_disposition is None \
                or content_disposition == "inline"
            if content_type == "text/plain" and is_inline:
                body['plain'].append(content)
            elif content_type == "text/html" and is_inline:
                body['html'].append(content)
            elif content_disposition:
                attachment = parse_attachment(part)
                if attachment:
                    attachments.append(attachment)

    elif maintype == 'text':
        payload = decode_content(email_message)
        body['plain'].append(payload)

    parsed_email['attachments'] = attachments

    parsed_email['body'] = body
    email_dict = dict(email_message.items())

    parsed_email['sent_from'] = get_mail_addresses(email_message, 'from')
    parsed_email['sent_to'] = get_mail_addresses(email_message, 'to')
    parsed_email['cc'] = get_mail_addresses(email_message, 'cc')
    parsed_email['bcc'] = get_mail_addresses(email_message, 'bcc')

    value_headers_keys = ['subject', 'date', 'message-id']
    key_value_header_keys = ['received-spf',
                             'mime-version',
                             'x-spam-status',
                             'x-spam-score',
                             'content-type']

    parsed_email['headers'] = []
    for key, value in email_dict.items():

        if key.lower() in value_headers_keys:
            valid_key_name = key.lower().replace('-', '_')
            parsed_email[valid_key_name] = decode_mail_header(value)

        if key.lower() in key_value_header_keys:
            parsed_email['headers'].append({'Name': key,
                                            'Value': value})

    if parsed_email.get('date'):
        timetuple = email.utils.parsedate(parsed_email['date'])
        parsed_date = datetime.fromtimestamp(time.mktime(timetuple)) \
            if timetuple else None
        parsed_email['parsed_date'] = parsed_date

    logger.info("Downloaded and parsed mail '{}' with {} attachments".format(
        parsed_email.get('subject'), len(parsed_email.get('attachments'))))
    return Struct(**parsed_email)
Exemplo n.º 12
0
def parse_email(uid, raw_email, policy=None):
    parsed_email = {'uid': bytes.decode(uid)}
    # parsed_email = {'uid': uid}
    if isinstance(raw_email, bytes):
        raw_email = str_encode(raw_email, 'utf-8', errors='ignore')
    if policy is not None:
        email_parse_kwargs = dict(policy=policy)
    else:
        email_parse_kwargs = {}

    try:
        email_message = email.message_from_string(raw_email,
                                                  **email_parse_kwargs)
    except UnicodeEncodeError:
        email_message = email.message_from_string(raw_email.encode('utf-8'),
                                                  **email_parse_kwargs)
    maintype = email_message.get_content_maintype()
    # parsed_email['raw_email'] = raw_email

    body = {"plain": [], "html": []}
    attachments = []

    if maintype in ('multipart', 'image'):
        for part in email_message.walk():
            content_type = part.get_content_type()
            part_maintype = part.get_content_maintype()
            content_disposition = part.get('Content-Disposition', None)
            if content_disposition or not part_maintype == "text":
                content = part.get_payload(decode=True)
            else:
                content = decode_content(part)

            is_inline = content_disposition is None \
                or content_disposition.startswith("inline")
            if content_type == "text/plain" and is_inline:
                body['plain'].append(re.sub('\\r|\\n', '', str(content)))
            elif content_type == "text/html" and is_inline:
                body['html'].append(re.sub('\\r|\\n', '', str(content)))
            elif content_type == "image/jpeg" and is_inline:  # 正文插图
                filename = decode_mail_header(str(part.get_param('name')))
                if not op.eq(filename, 'None'):
                    filename = filename[:-4]
                    if not os.path.exists(illustrate_path):
                        os.makedirs(illustrate_path)
                    with open(illustrate_path + "/" + filename, "wb") as fw:
                        fw.write(content)
                        fw.close()
            elif content_disposition:  # 附件
                attachment = parse_attachment(part)
                if attachment:
                    attachments.append(attachment)

    elif maintype == 'text':
        payload = decode_content(email_message)
        body['plain'].append(payload)

    parsed_email['attachments'] = attachments

    parsed_email['body'] = body
    email_dict = dict(email_message.items())

    parsed_email['sent_from'] = get_mail_addresses(email_message, 'from')
    parsed_email['sent_to'] = get_mail_addresses(email_message, 'to')
    parsed_email['cc'] = get_mail_addresses(email_message, 'cc')
    parsed_email['bcc'] = get_mail_addresses(email_message, 'bcc')

    value_headers_keys = ['subject', 'date', 'message-id']
    key_value_header_keys = [
        'received-spf', 'mime-version', 'x-spam-status', 'x-spam-score',
        'content-type'
    ]

    parsed_email['headers'] = []
    for key, value in email_dict.items():

        if key.lower() in value_headers_keys:
            valid_key_name = key.lower().replace('-', '_')
            parsed_email[valid_key_name] = decode_mail_header(value)

        if key.lower() in key_value_header_keys:
            parsed_email['headers'].append({'Name': key, 'Value': value})

    if parsed_email.get('date'):
        timetuple = email.utils.parsedate(parsed_email['date'])
        parsed_date = datetime.fromtimestamp(
            time.mktime(timetuple)) if timetuple else None
        parsed_email['parsed_date'] = parsed_date.strftime("%Y-%m-%d %H:%M:%S")

    return Struct(**parsed_email)