def unicode_hdr(hdr, fallback=None): more_encs = [fallback] if fallback else [] try: return u"".join( fuzzydecode(hdr, [encoding] + more_encs) if encoding else hdr for (hdr, encoding) in decode_header(hdr) ) except UnicodeErrors: # a unicode string with extended ascii characters gets into this function, this is what happens # because the stdlib's decode_header function calls str() on its argument. return fuzzydecode(hdr, more_encs + ["utf-8"]) except Exception: # an example header that raises ValueError (because of an int() call in email.header.decode_headers) # 'You=?UTF8?Q?=E2=80=99?=re HOT, [email protected] =?UTF8?Q?=E2=80=93?= See if Someone Searched for You' log.warning("decoding an email header failed: %r", hdr) return fuzzydecode(hdr, more_encs + ["utf-8"])
def utf16_encode(str): try: return unicode(str, 'utf-16') except TypeError: if isinstance(str, unicode): return str.encode('utf-16') else: return fuzzydecode(s, 'utf-8').encode('utf-16')
def unicode_hdr(hdr, fallback=None): more_encs = [fallback] if fallback else [] try: return u''.join( fuzzydecode(hdr, [ encoding, ] + more_encs) if encoding else hdr for (hdr, encoding) in decode_header(hdr)) except UnicodeErrors: # a unicode string with extended ascii characters gets into this function, this is what happens # because the stdlib's decode_header function calls str() on its argument. return fuzzydecode(hdr, more_encs + ['utf-8']) except Exception: # an example header that raises ValueError (because of an int() call in email.header.decode_headers) # 'You=?UTF8?Q?=E2=80=99?=re HOT, [email protected] =?UTF8?Q?=E2=80=93?= See if Someone Searched for You' log.warning('decoding an email header failed: %r', hdr) return fuzzydecode(hdr, more_encs + ['utf-8'])
def __init__(self, acct, m_tag): ''' http://msnpiki.msnfanatic.com/index.php/MSNP13:Offline_IM * T: Unknown, but has so far only been set to 11. * S: Unknown, but has so far only been set to 6. * RT: The date/time stamp for when the message was received by the server. This stamp can be used to sort the message in the proper order, although you are recommended to use a different method instead which will be explained later. * RS: Unknown, but most likely is set to 1 if the message has been read before ("Read Set"). * SZ: The size of the message, including headers * E: The e-mail address of the sender * I: This is the ID of the message, which should be used later on to retrieve the message. Note that the ID is a GUID in the form XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX. It was previously (the change was first noticed in March 2007) in the format of "MSGunix-timestamp.millseconds" (for example MSG1132093467.11) and the Message ID format could change again anytime. * F: Unknown, but has so far only been observed as either a GUID with a single 9 at the end, or as ".!!OIM" (in case you are already online when receiving the notification). * N: This field contains the friendlyname of the person, wrapped in a special encoding. This encoding is defined in RFC 2047, but to get you started there is a quick overview of the format below (see #Field_encoding). You are recommended however to implement a fully able e-mail parser to handle OIMs! o Note! When this field is found in a non-initial notification it will contain a space in the data field. You must filter this space (trim the string) in order to correctly decode this field! * SU: Unknown, has only been observed to contain one space. Example: <M> <T>11</T> <S>6</S> <RT>2007-05-14T15:52:53.377Z</RT> <RS>0</RS> <SZ>950</SZ> <E>[email protected]</E> <I>08CBD8BE-9972-433C-A9DA-84A0A725ABFA</I> <F>00000000-0000-0000-0000-000000000009</F> <N>=?utf-8?B?QWFyb24=?=</N> </M> ''' self.acct = acct self.size = int(str(m_tag.SZ)) self.email = str(m_tag.E) self.name = u'' for val, encoding in decode_header(m_tag.N.text.strip()): self.name += fuzzydecode(val, encoding) try: self.time = self.parse_time(str(m_tag.RT)) except Exception: self.time = None self.id = UUID(str(m_tag.I)) self.msg = '' self.deleted = False self._had_error = False self.received = False self.runid = None self.seqnum = 0 log.info_s('%r created', self)