def create_message(corpus, data, msg0=None): from email import Charset, Parser, MIMEMessage if not isinstance(data, unicode): raise TypeError('data must be a unicode.') p = Parser.FeedParser() p.feed(data) msg1 = p.close() csmap = Charset.Charset(config.MESSAGE_CHARSET) attach_msgs = [] # Re-encode the headers. headers = [] labels = [] for (k, v) in msg1.items(): v = rmsp(v) if not v: continue kl = k.lower() if kl == 'x-forward-msg': attach_msgs.extend(get_numbers(v)) continue if kl == 'label': labels = v.split(',') continue headers.append((k.encode('ascii', 'strict'), encode_header(v, csmap))) # Remove all the existing headers. for k in msg1.keys(): del msg1[k] # Reattach the headers. for (k, v) in headers: msg1[k] = v # Change the body. data = msg1.get_payload(decode=False) try: # First try to encode with us-ascii. data.encode('ascii', 'strict') # Succeed. msg1.set_charset('ascii') except UnicodeError: # Re-encode the body. if not csmap.output_charset: csmap = Charset.Charset('utf-8') msg1.set_charset(str(csmap.output_charset)) data = data.encode(str(csmap.output_codec), 'replace') msg1.set_payload(data) # Attach other messages (for forwarding). if attach_msgs: for loc in attach_msgs: p = Parser.FeedParser() p.feed(corpus.get_message(loc)) msg1 = mime_add(msg1, MIMEMessage.MIMEMessage(p.close())) # Integrate other mime objects. if msg0 and msg0.is_multipart(): for obj in msg0.get_payload()[1:]: msg1 = mime_add(msg1, obj) validate_message_structure(msg1) return (msg1, labels)
def finish(self): import poplib from email import Parser for i in xrange(self.msgcount): try: data = self.server.retr(i+1)[1] except poplib.error_proto, e: raise MessagePOP3Error(str(e)) data = '\r\n'.join(data) p = Parser.FeedParser() p.feed(data) msg = p.close() if self.ruleset: labels = self.ruleset.apply_msg(msg) else: labels = [] yield (data, labels, get_message_date(msg))
def read_messages(self): from email import Parser for fname in os.listdir(self.dirname): if fname.startswith('.'): continue fname = os.path.join(self.dirname, fname) fp = file(fname, 'rb') p = Parser.FeedParser() for line in fp: p.feed(line) if not line.strip(): break msg = p.close() if self.ruleset: labels = self.ruleset.apply_msg(msg) else: labels = [] self.msgs.append((fname, labels, get_message_date(msg))) return
def finish(self): from mailbox import PortableUnixMailbox from email import Parser try: fp = file(self.fname, 'rb') except IOError: return for data in PortableUnixMailbox(fp, lambda msgfp: msgfp.read()): p = Parser.FeedParser() p.feed(data) msg = p.close() if self.ruleset: labels = self.ruleset.apply_msg(msg) else: labels = [] yield (data, labels, get_message_date(msg)) fp.close() return