def format_addresses(addresses, header_name=None, charset=None): """ Convert a list of addresses into a MIME-compliant header for a From, To, Cc, or any other I{address} related field. This mixes the use of email.utils.formataddr() and email.header.Header(). @type addresses: list @param addresses: list of addresses, can be a mix of string a tuple of the form C{[ 'address@domain', (u'Name', 'name@domain'), ...]}. If C{u'Name'} contains non us-ascii characters, it must be a unicode string or encoded using the I{charset} argument. @type header_name: string or None @keyword header_name: the name of the header. Its length is used to limit the length of the first line of the header according the RFC's requirements. (not very important, but it's better to match the requirements when possible) @type charset: str @keyword charset: the encoding charset for non unicode I{name} and a B{hint} for encoding of unicode string. In other words, if the I{name} of an address in a byte string containing non I{us-ascii} characters, then C{name.decode(charset)} must generate the expected result. If a unicode string is used instead, charset will be tried to encode the string, if it fail, I{utf-8} will be used. With B{Python 3.x} I{charset} is no more a hint and an exception will be raised instead of using I{utf-8} has a fall back. @rtype: str @return: the encoded list of formated addresses separated by commas, ready to use as I{Header} value. >>> print(format_addresses([('John', '*****@*****.**') ], 'From', 'us-ascii').encode()) John <*****@*****.**> >>> print(format_addresses([('l\\xe9o', '*****@*****.**') ], 'To', 'iso-8859-1').encode()) =?iso-8859-1?q?l=E9o?= <*****@*****.**> >>> print(format_addresses([('l\\xe9o', '*****@*****.**') ], 'To', 'us-ascii').encode()) ... # don't work in 3.X because charset is more than a hint ... #doctest: +SKIP =?utf-8?q?l=C3=A9o?= <*****@*****.**> >>> # because u'l\xe9o' cannot be encoded into us-ascii, utf8 is used instead >>> print(format_addresses([('No\\xe9', '*****@*****.**'), ('M\\u0101ori', '*****@*****.**') ], 'Cc', 'iso-8859-1').encode()) ... # don't work in 3.X because charset is more than a hint ... #doctest: +SKIP =?iso-8859-1?q?No=E9?= <*****@*****.**> , =?utf-8?b?TcSBb3Jp?= <*****@*****.**> >>> # 'No\xe9' is already encoded into iso-8859-1, but u'M\\u0101ori' cannot be encoded into iso-8859-1 >>> # then utf8 is used here >>> print(format_addresses(['*****@*****.**', ('John', '*****@*****.**') ], 'From', 'us-ascii').encode()) [email protected] , John <*****@*****.**> """ header=email.header.Header(charset=charset, header_name=header_name) for i, address in enumerate(addresses): if i!=0: # add separator between addresses header.append(',', charset='us-ascii') try: name, addr=address except ValueError: # address is not a tuple, their is no name, only email address header.append(address, charset='us-ascii') else: # check if address name is a unicode or byte string in "pure" us-ascii if utils.is_usascii(name): # name is a us-ascii byte string, i can use formataddr formated_addr=email.utils.formataddr((name, addr)) # us-ascii must be used and not default 'charset' header.append(formated_addr, charset='us-ascii') else: # this is not as "pure" us-ascii string # Header will use "RFC2047" to encode the address name # if name is byte string, charset will be used to decode it first header.append(name) # here us-ascii must be used and not default 'charset' header.append('<%s>' % (addr,), charset='us-ascii') return header
def format_addresses(addresses, header_name=None, charset=None): """ Convert a list of addresses into a MIME-compliant header for a From, To, Cc, or any other I{address} related field. This mixes the use of email.utils.formataddr() and email.header.Header(). @type addresses: list @param addresses: list of addresses, can be a mix of string a tuple of the form C{[ 'address@domain', (u'Name', 'name@domain'), ...]}. If C{u'Name'} contains non us-ascii characters, it must be a unicode string or encoded using the I{charset} argument. @type header_name: string or None @keyword header_name: the name of the header. Its length is used to limit the length of the first line of the header according the RFC's requirements. (not very important, but it's better to match the requirements when possible) @type charset: str @keyword charset: the encoding charset for non unicode I{name} and a B{hint} for encoding of unicode string. In other words, if the I{name} of an address in a byte string containing non I{us-ascii} characters, then C{name.decode(charset)} must generate the expected result. If a unicode string is used instead, charset will be tried to encode the string, if it fail, I{utf-8} will be used. With B{Python 3.x} I{charset} is no more a hint and an exception will be raised instead of using I{utf-8} has a fall back. @rtype: str @return: the encoded list of formated addresses separated by commas, ready to use as I{Header} value. >>> print format_addresses([('John', '*****@*****.**') ], 'From', 'us-ascii').encode() John <*****@*****.**> >>> print format_addresses([(u'l\\xe9o', '*****@*****.**') ], 'To', 'iso-8859-1').encode() =?iso-8859-1?q?l=E9o?= <*****@*****.**> >>> print format_addresses([(u'l\\xe9o', '*****@*****.**') ], 'To', 'us-ascii').encode() ... # don't work in 3.X because charset is more than a hint ... #doctest: +SKIP =?utf-8?q?l=C3=A9o?= <*****@*****.**> >>> # because u'l\xe9o' cannot be encoded into us-ascii, utf8 is used instead >>> print format_addresses([('No\\xe9', '*****@*****.**'), (u'M\u0101ori', '*****@*****.**') ], 'Cc', 'iso-8859-1').encode() ... # don't work in 3.X because charset is more than a hint ... #doctest: +SKIP =?iso-8859-1?q?No=E9?= <*****@*****.**> , =?utf-8?b?TcSBb3Jp?= <*****@*****.**> >>> # 'No\xe9' is already encoded into iso-8859-1, but u'M\u0101ori' cannot be encoded into iso-8859-1 >>> # then utf8 is used here >>> print format_addresses(['*****@*****.**', ('John', '*****@*****.**') ], 'From', 'us-ascii').encode() [email protected] , John <*****@*****.**> """ header=email.header.Header(charset=charset, header_name=header_name) for i, address in enumerate(addresses): if i!=0: # add separator between addresses header.append(',', charset='us-ascii') try: name, addr=address except ValueError: # address is not a tuple, their is no name, only email address header.append(address, charset='us-ascii') else: # check if address name is a unicode or byte string in "pure" us-ascii if utils.is_usascii(name): # name is a us-ascii byte string, i can use formataddr formated_addr=email.utils.formataddr((name, addr)) # us-ascii must be used and not default 'charset' header.append(formated_addr, charset='us-ascii') else: # this is not as "pure" us-ascii string # Header will use "RFC2047" to encode the address name # if name is byte string, charset will be used to decode it first header.append(name) # here us-ascii must be used and not default 'charset' header.append('<%s>' % (addr,), charset='us-ascii') return header