def test_factory_arg_overrides_policy(self): for parser in self.parsers: with self.subTest(parser=parser.__name__): MyPolicy = default.clone(message_factory=self.MyMessage) msg = parser("To: foo\n\ntest", Message, policy=MyPolicy) self.assertNotIsInstance(msg, self.MyMessage) self.assertIsInstance(msg, Message)
def test_factory_arg_overrides_policy(self): for parser in self.parsers: with self.subTest(parser=parser.__name__): MyPolicy = default.clone(message_factory=self.MyMessage) msg = parser('To: foo\n\ntest', Message, policy=MyPolicy) self.assertNotIsInstance(msg, self.MyMessage) self.assertIsInstance(msg, Message)
def protect_mail(mail,linesep='\r\n',sevenbit=True): "convert mail and subparts to ProtectedMessage, convert payloads to 7bit and CRLF" from email.message import Message from email.parser import Parser from email.encoders import encode_quopri import copy, six def fix_encoding(msg): cte = 'Content-Transfer-Encoding' if msg[cte] == '8bit' and msg.get_content_maintype()=='text' and msg.get_charset() is None: charset = msg.get_content_charset() if not charset: return #broken payload = msg.get_payload(decode=False) msg.set_payload(payload) msg.set_charset(charset) if not sevenbit: return try: msg.get_payload().encode('ascii') except UnicodeError: encode_quopri(msg) else: if not cte in msg: msg.add_header(cte,'7bit') if six.PY3: from email.policy import default cte_type='7bit' if sevenbit else '8bit' if not isinstance(mail,Message): mail = _protected(mail,linesep=linesep,cte_type=cte_type) linesep = mail.policy.linesep elif not linesep: linesep = get_linesep(mail.as_string()) policy = default.clone(linesep=linesep,cte_type=cte_type) mail = copy.deepcopy(mail) mail.policy = policy else: mail = _protected(mail,linesep=linesep) linesep = mail._linesep # get new or original lineseps if mail.is_multipart(): converted = [] for submsg in mail.get_payload(): if submsg.is_multipart(): submsg = protect_mail(submsg,linesep,sevenbit) else: fix_encoding(submsg) submsg = _protected(submsg,linesep=linesep) converted.append(submsg) mail.set_payload(None) for submsg in converted: mail.attach(submsg) else: fix_encoding(mail) if linesep: payload = mail.get_payload() if isinstance(payload,six.string_types): mail.set_payload(fix_lines(payload,linesep=linesep)) return mail
def test_parse_email_with_policy(self): if not DEFAULT_POLICY: return message_object = email.message_from_bytes( raw_email_encoded_needs_refolding, policy=DEFAULT_POLICY.clone(refold_source='all')) self.assertEqual([{ 'email': '*****@*****.**', 'name': 'Receiver' }, { 'email': '*****@*****.**', 'name': 'Second Receiver' }], get_mail_addresses(message_object, 'to'))
def _protected(txt,linesep=None,headersonly=False,template=None,cte_type='7bit'): "convert txt to a protected message without modifying the subparts" from email.parser import Parser from email.message import Message if six.PY3: from email.policy import default if isinstance(txt,Message): if not headersonly: if not linesep or linesep==txt.policy.linesep: import copy return copy.deepcopy(txt) return protect_mail(txt,linesep) txt = mail_binary(txt) if linesep is None: linesep = get_linesep(txt) policy = default.clone(linesep=linesep,cte_type=cte_type) if not template: template = Message if isinstance(txt,bytes): from email.parser import BytesParser return BytesParser(policy=policy,_class=template).parsebytes(txt,headersonly) return Parser(policy=policy,_class=template).parsestr(txt,headersonly) if isinstance(txt,ProtectedMessage): if not headersonly and (not linesep or linesep==txt._linesep): import copy return copy.deepcopy(txt) charset = txt._charset txt = txt.as_string() elif isinstance(txt,Message): charset = txt.get_content_charset() txt = mail_binary(txt) else: charset = None if linesep is None: linesep = get_linesep(txt) else: txt = fix_lines(txt,linesep) # convert lineseps if not template: template = ProtectedMessage from email.parser import HeaderParser P = HeaderParser if headersonly else Parser mail = P(_class=template).parsestr(txt) if not charset: charset = mail.get_content_charset() if charset: # cannot use mail.set_charset(charset), as it adds MIME-Version header mail._charset = charset mail._linesep = linesep return mail
import sys import email import gzip from email.policy import default from email.policy import compat32 class MyHeaderClass(email.headerregistry.UnstructuredHeader): @classmethod def parse(cls, value, kwds): # print("Input:" + value) super().parse(value.lstrip(), kwds) # print(kwds) # print(default.header_factory) policy = default.clone() policy.header_factory.map_to_type('references', MyHeaderClass) policy.header_factory.map_to_type('message-id', MyHeaderClass) def process(f): msg = email.message_from_binary_file(f, policy=policy) # N.B. This may repeat keys for key,val in msg.items(): print() print(key, val) for file in sys.argv[1:]: print(file) if file.find(".gz") != -1: with gzip.open(file,'rb') as gp: process(gp)
def test_custom_message_factory_on_policy(self): for parser in self.parsers: with self.subTest(parser=parser.__name__): MyPolicy = default.clone(message_factory=self.MyMessage) msg = parser('To: foo\n\ntest', policy=MyPolicy) self.assertIsInstance(msg, self.MyMessage)
class Remailer: def __init__(self, imap_connection, smtp_service): self._imap_cxn = imap_connection self._smtp_service = smtp_service # Check the connection capabilities to see if it supports # the MOVE command typ, capabilities_str = self._imap_cxn.capability() capabilities = capabilities_str[0].split() if b'MOVE' in capabilities: self._imap_has_move = True else: self._imap_has_move = False self._uptime_timer = Timer() self._imap_timer = Timer() self._imap_reconnect_count = 0 def resetIMAPTimer(self): self._imap_timer = Timer() def _uptimeStr(self): return self._uptime_timer.simpleElapsedTimeString() def _imapupStr(self): return self._imap_timer.simpleElapsedTimeString() def setIMAPConnction(self, imap_cxn): self._imap_cxn = imap_cxn self._imap_reconnect_count += 1 def _validateFolder(self, folder_name): typ, [response] = self._imap_cxn.select(folder_name) if typ != 'OK': raise RuntimeError(response) def validateFolderStructure(self): self._validateFolder(incoming_folder) self._validateFolder(sent_folder) self._validateFolder(exception_folder) self._validateFolder(original_folder) self._validateFolder(notag_folder) def getAllFolderUIDs(self, folder): typ, [response] = self._imap_cxn.select(folder) if typ != 'OK': raise RuntimeError(response) typ, response = self._imap_cxn.uid('search', None, 'ALL') if typ != 'OK': raise RuntimeError(response) message_uids = response[0].split() return message_uids def fetchMessageUIDAsBytes(self, message_uid): # Fetch the contents of the message typ, data = self._imap_cxn.uid('fetch', message_uid, '(RFC822)') if typ != 'OK': raise RuntimeError(data) # The message date comes in as a bytestring. Resist the temptation # to decode it into a UTF-8 string. message_bytes = data[0][1] return message_bytes def checkIMAPResponse(self, code, response): if code != 'OK': raise RuntimeError(response) def msgId(self, message_uid): message_id = 'ID(' + message_uid.decode('utf-8') + ')' return message_id def moveMessageUID(self, message_uid, destination_folder): message_id = self.msgId(message_uid) debug('Moving message %s to %s' % (message_id, destination_folder)) # If our IMAP server supports the MOVE command, then we simply # call it directly. If not, we do it the hard way. if self._imap_has_move: typ, [response] = self._imap_cxn.uid('move', message_uid, destination_folder) else: # Here's the hard way: copy the message to the folder... typ, [response] = self._imap_cxn.uid('copy', message_uid, destination_folder) self.checkIMAPResponse(typ, response) # ...then delete the original. typ, [response] = self._imap_cxn.uid('store', message_uid, '+FLAGS', r'(\Deleted)') self.checkIMAPResponse(typ, response) http_url_regex = 'https://ei194.infusion-links.com/[a-zA-Z0-9/]+' http_url_prog = re.compile(http_url_regex) def remapURLs(self, message_part_str): # See if there is an infusionlinks URL match = self.http_url_prog.search(message_part_str) while match is not None: matched_url = match.group(0) # print() # print(" Found infusionlinks url <%s>" % matched_url) mapped_url = get_redirect_for(matched_url) message_part_str = macro_substitute(message_part_str, match, mapped_url) # Find the next infusionlinks URL match = self.http_url_prog.search(message_part_str) return message_part_str tracking_pixel_url_regex = "https://is-tracking-pixel-api-prod.appspot.com/[a-zA-Z0-9/]+" tracking_pixel_url_prog = re.compile(tracking_pixel_url_regex) def suppressTrackingPixels(self, message_part_str): # See if there is an infusionlinks URL match = self.tracking_pixel_url_prog.search(message_part_str) while match is not None: matched_url = match.group(0) # print() # print(" Found tracking pixel url <%s>" % matched_url) # Delete the URL. message_part_str = macro_substitute(message_part_str, match, "") # Find the next tracking pixel URL match = self.tracking_pixel_url_prog.search(message_part_str) return message_part_str mime_pattern = "([a-z]+)/([a-z]+)" mime_prog = re.compile(mime_pattern) def typeAndSubtype(self, mime_type_str): match = self.mime_prog.match(mime_type_str) if match is not None: main_type = match.group(1) sub_type = match.group(2) return main_type, sub_type return "", mim_type_str delete_html_parts = True def performSubstitutionOnMessageParts(self, obj): remail_addresses_set = set() # Loop over all the message parts. for part in obj.walk(): content_type = part.get_content_type() content_charset = part.get_content_charset() content_disposition = part.get_content_disposition() # Spit out some diagnostic messages. debug("Content-Type: %s" % content_type) debug("Content-Charset: %s" % content_charset) debug("Content-Disposition: %s" % content_disposition) # Multipart parts are basically containers, so we don't process # them. But we process all others. if not part.is_multipart(): main_type, sub_type = self.typeAndSubtype(content_type) # Optional configuration: just delete all HTML parts because # something in them is causing emails to get filed as SPAM. if self.delete_html_parts and sub_type == "html": pl = obj.get_payload() pl.remove(part) # This part has been deleted - no further processing # needed for this part. continue debug("main_type = <%s>, sub_type = <%s>" % (main_type, sub_type)) # Get the message_part_str of this part of the message. # If this part of the message was encoded in (possibly) # MIME quoted-printable, it will be decoded into a string # in (proabably) UTF-8 unicode. (Which is good, because # it's easier to deal with in this form.) message_part_str = part.get_content() # Now some real processing... maybe_modified_content_str = scanPartForTruncateTags( message_part_str) # Scan the part for remail-to: tags, replace them, # and accumulate the recipient addresses. maybe_modified_content_str, more_remail_addresses_set = \ scanPartForRemailTags(maybe_modified_content_str) # Get the union of the two sets. remail_addresses_set |= more_remail_addresses_set # # Now perform mapping of any infusion-link URLs to their # # direct link conterparts. # maybe_modified_content_str = self.remapURLs(maybe_modified_content_str) # # # Finally, nuke any tracking pixel URLs # maybe_modified_content_str = self.suppressTrackingPixels(maybe_modified_content_str) # If any of these steps have modified the content of this # part of the message, then replace that part of the # message object. if maybe_modified_content_str != message_part_str: part.set_content(maybe_modified_content_str, subtype=sub_type, charset=content_charset, disposition=content_disposition) # Return any remail-to addresses we found. (It's not # to return the message object. It's passed by reference, # and the caller's reference will retain any changes # we've made here.) return remail_addresses_set from email.policy import default MHTMLPolicy = default.clone(linesep='\r\n', max_line_length=0) def doThemAll(self): first_send_this_iteration = True # Get the UIDs of all the messages in our Inbox and compute # the number of messages, which we key off of for some # info messages and housekeeping. message_uids = self.getAllFolderUIDs(incoming_folder) message_count = len(message_uids) # Report the number of messages in the Inbox. mc_suffix = "" if message_count == 1 else "s" info( "%d message%s in %s, Uptime: %s, IMAP uptime: %s, reconnect count: %d" % (message_count, mc_suffix, incoming_folder, self._uptimeStr(), self._imapupStr(), self._imap_reconnect_count)) if message_count > 0: print( '################################################################################' ) # Loop through all the messages in the inbox. for message_uid in message_uids: # Wrap this processing in a try block so # that if a message fails we may still be # able to process others. try: message_bytes = self.fetchMessageUIDAsBytes(message_uid) # Emit some messages to show progress. print() info("Message %s" % self.msgId(message_uid)) showMessageSubject(message_bytes) message_obj = messageBytesAsObject(message_bytes) remail_addresses_set = self.performSubstitutionOnMessageParts( message_obj) remail_count = len(remail_addresses_set) if remail_count > 0: rm_suffix = "" if remail_count == 1 else "es" info("Found %d remail address%s" % (remail_count, rm_suffix)) # We found at least one valid remail-to tag, so the original # message should be move to the originals folder. self.moveMessageUID(message_uid, original_folder) # The message in message_bytes has already had its body # modified (remail-to tags removed, infusionlinks URLs # replaced, tracking pixel URLs deleted). Now we modify # the headers to make the message look like a brand new # message, not something that's been bounced around the # Internet already. mutateHeaders(message_obj, global_from_addr) # Construct a single To: header with all of the email # addresses in it. to_header_str = ', '.join(remail_addresses_set) message_obj.add_header("To", to_header_str) # debug("Base message headers:") # dumpHeaders(message_obj) # Save the base message to IMAP so it can easily be resent # later. now = Time2Internaldate(time.time()) message_bytes = self._smtp_service.message_bytes( message_obj) typ, data = self._imap_cxn.append(sent_folder, '', now, message_bytes) # message_obj now contains the base message, which we # send to each of the recipients in turn. # We're about to send an email. If it's the first email # for this iteration, then we need to get the SMTP server # ready. if first_send_this_iteration: debug("Readying SMTP service.") self._smtp_service.readyService() # Send the email to each of its recipients. for recipient_address in remail_addresses_set: info("Sending to <%s>" % recipient_address) self._smtp_service.send_message( global_from_addr, recipient_address, message_obj) else: # No addresses to remail to - move the original message to the # original-notag folder debug("No remail addresses! Moving to no-tag folder.") self.moveMessageUID(message_uid, notag_folder) except Exception as e: traceback.print_tb(e.__traceback__) print("*** Error processing message - skipping.") # If we had some messages to process, then do some cleanup... if message_count > 0: # Close the connection to the SMTP server. SMTP servers don't # like it when connections to them remain open too long, so # we close the connection. This call is harmless if the connection # was never opened. debug("Terminating SMTP service.") self._smtp_service.terminateService() info('*** Done ***') def testIMAPConnection(self): self._imap_cxn.noop()
def test_custom_message_factory_on_policy(self): for parser in self.parsers: with self.subTest(parser=parser.__name__): MyPolicy = default.clone(message_factory=self.MyMessage) msg = parser("To: foo\n\ntest", policy=MyPolicy) self.assertIsInstance(msg, self.MyMessage)
def create_mime(data,maintype='text',subtype=None,charset=None, params={}, headers={}, encoding=None): """ create MIME message from data. if maintype is None, the default content type is text/plain for str, and application/octet-stream for binary data. The default charset and encoding are automatically detected. """ from email.mime.base import MIMEBase from email.mime.application import MIMEApplication from email.mime.audio import MIMEAudio from email.mime.image import MIMEImage from email.mime.message import MIMEMessage from email.mime.text import MIMEText import email.encoders, six if maintype is None: if isinstance(data,(six.text_type,)+six.string_types): maintype= 'text' elif isinstance(data,six.binary_type): maintype,subtype = 'application', subtype or 'octet-stream' else: maintype = maintype.lower() cte = 'Content-Transfer-Encoding' if maintype=='text': subtype = subtype or 'plain' data, charset = check_charset(data,charset) if encoding is None: msg = MIMEText(data, subtype, charset) else: msg = MIMEText('', subtype, charset) if encoding in ('base64','quoted-printable'): del msg[cte] if not isinstance(data,six.binary_type): data = data.encode(charset) msg.set_payload(data) if encoding=='base64': email.encoders.encode_base64(msg) else: email.encoders.encode_quopri(msg) elif encoding: if six.PY3: from email.policy import default policy = default.clone(cte_type='8bit' if encoding=='8bit' else '7bit') msg.policy = policy msg.replace_header(cte,encoding) if encoding=='7bit' and charset!='us-ascii': raise UnicodeEncodeError if six.PY3 and isinstance(data,six.binary_type): data = data.decode(charset) msg.set_payload(data) if msg[cte]=='base64': # remove superflous newline data = msg.get_payload(decode=False) msg.set_payload(data.rstrip()) else: idata = data if encoding else '' if maintype=='application': msg = MIMEApplication(idata, subtype, **params) elif maintype=='audio': msg = MIMEAudio(idata, subtype, **params) elif maintype=='image': msg = MIMEImage(idata, subtype, **params) elif maintype=='message': msg = MIMEMessage(idata, subtype) else: msg = MIMEBase(maintype, subtype, **params) encoding = encoding or 'base64' if encoding in ('base64','quoted-printable'): del msg[cte] msg.set_payload(data) if encoding=='base64': email.encoders.encode_base64(msg) else: email.encoders.encode_quopri(msg) elif encoding: if six.PY3: from email.policy import default policy = default.clone(cte_type='7bit' if encoding=='7bit' else '8bit') msg.policy = policy msg.replace_header(cte,encoding) if encoding=='7bit': if isinstance(data,six.string_types): data.encode('us-ascii') else: data.decode('us-ascii') if six.PY3 and isinstance(data,six.binary_type): data = data.decode('utf-8','surrogateescape') msg.set_payload(data) if msg[cte]=='base64': # remove superflous newline data = msg.get_payload(decode=False) msg.set_payload(data.rstrip()) for k, v in six.iteritems(headers): _set_header(msg,k,v) return msg