def read(self, source_path): """ Parse, and return (content, metadata) """ parser = email.parser.Parser() with pelican_open(source_path) as source: message = parser.parsestr(source) location = message.get('location') if not location: raise ValueError( u"RedirectReader requires a 'location' header in the file") delay = float(message.get('delay', 0)) metadata = { 'title': message.get('title', u''), 'location': location, 'delay': delay, 'status': message.get('status', 'hidden') } # Slug is important because it Pelican's slugification affects # the final URL, and we care about exact URLs here. # So for redirect files, we assume that you named them carefully. # And allow overriding by explicit slug. slug = message.get('slug') if not slug: slug = os.path.splitext(os.path.basename(source_path))[0] if slug: metadata['slug'] = slug content = message.get_payload().strip() return content, metadata
def extract_messages(path, addressbook, outboxes, verbose): """ Extracts e-mails from the Enron corpus """ # Set ensures messages are unique messages = set() parser = email.parser.Parser() outbox_re = [re.compile(r) for r in ['sent_items$', 'sent$', 'sent_mail$']] for root, _, files in os.walk(path): # Only parse messages in 'sent' folder if outboxes and not any(re.search(root) for re in outbox_re): continue if verbose: print(root) for message_file in files: path = os.path.join(root, message_file) with codecs.open(path, 'r', 'Latin-1') as message_file: message = parser.parsestr(message_file.read()) # Resolve senders and recipients sender = message['From'] if sender not in addressbook: continue recipients = [] if message['To'] is not None: recipients += [m.strip(',') for m in message['To'].split()] if message['Cc'] is not None: recipients += [m.strip(',') for m in message['Cc'].split()] if message['Bcc'] is not None: recipients += [m.strip(',') for m in message['Bcc'].split()] # Only include recipients in addressbook wl_recipients = tuple(addressbook[r] for r in recipients if r in addressbook) if len(wl_recipients) == 0: continue messages.add(Message(sender, wl_recipients, dateutil.parser.parse(message['Date']))) return sorted(messages, key=lambda x: x.timestamp)
def get_parts(response): """Extract parts from headers. Params: response: a request object Returns: an array of content-ids """ head_lines = '' for k, v in response.raw.getheaders().iteritems(): head_lines += str(k) + ':' + str(v) + '\n' full = head_lines + response.content parser = email.parser.Parser() decoded_reply = parser.parsestr(full) parts = {} start = decoded_reply.get_param('start').lstrip('<').rstrip('>') i = 0 for part in decoded_reply.get_payload(): cid = part.get('content-Id', '').lstrip('<').rstrip('>') if (not start or start == cid) and 'start' not in parts: parts['start'] = part.get_payload() else: parts[cid or 'Attachment%d' % i] = part.get_payload() i += 1 return parts
def __init__(self, config, body, rcpts, parse_rcpts=False, efrom=None): """Create a new message to the given recipients containing the text given in `body`. If no envelope sender address is passed in `efrom`, it is guessed from the message body. If `parse_rcpts` is true, the message body is parsed for additional recipients.""" self.rcpts = set() for rcpt in rcpts: name, addr = email.utils.parseaddr(rcpt) if addr: self.rcpts.add(addr) parser = email.parser.Parser() self.message = parser.parsestr(self.received() + body, True) if efrom is None: if not self.message.has_key("from"): default_from = config.get_general(str, "default_from") if default_from: self.message["From"] = default_from self.efrom = self.guess_envelope_from() else: self.efrom = efrom if not self.message.has_key("from"): self.message["From"] = self.efrom if parse_rcpts: self.add_recipient_addresses() self.fix_headers()
def fetchmailbody(mail_detail, result): result['error'] = 0 result['data'] = {}; # server.debug = 5; message = [] message.append(mail_detail[u'uid']) body_section = 'BODY.PEEK[' + mail_detail['section_id'] + ']' body_field = 'BODY[' + mail_detail['section_id'] + ']' body_mime = 'BODY[' + mail_detail['section_id'] + '.MIME]' response = server.fetch(message, [body_section, 'RFC822.HEADER', body_mime]) result['data']['body'] = '' result['data']['is_html'] = 0 for msgid, data in response.iteritems(): email_eml = data[str(body_mime)].encode('utf-8', 'replace') + data[str(body_field)].encode('utf-8', 'replace') mail = email.message_from_string(email_eml) for part in mail.walk(): if part.get_content_charset() is None: charset = chardet.detect(str(part))['encoding'] else: charset = part.get_content_charset() result['data']['body'] = unicode(part.get_payload(decode=True),str(charset),"ignore").encode('utf8','replace') if part.get_content_type() == 'text/html': result['data']['is_html'] = 1 if part.get_content_type() == 'text/plain': result['data']['is_html'] = 0 parser = email.parser.HeaderParser() headers = parser.parsestr(data[u'RFC822.HEADER'].encode('utf-8', 'replace')) result['data']['from'] = headers['FROM'].encode('utf-8', 'replace') result['data']['to'] = headers['TO'].encode('utf-8', 'replace') if (headers['CC']): result['data']['cc'] = headers['CC'].encode('utf-8', 'replace') if (headers['BCC']): result['data']['bcc'] = headers['BCC'].encode('utf-8', 'replace') result['data']['time'] = int(time.mktime(email.utils.parsedate(headers['DATE']))) result['data']['subject'] = headers['SUBJECT'].encode('utf-8', 'replace')
async def parse_headers(cls, reader): ''' Read HTTP header data from `reader`. This code is a port of the HTTP header parsing code from the Python standard library; it has been modified to use asyncio. https://github.com/python/cpython/blob/3.6/Lib/http/client.py :return: a dict of headers and values. :raises: ValueError if a line longer than MAXLINE characters is discovered. :raises: ValueError if more than MAXHEADERS headers are discovered. ''' headers = [] while True: line = await reader.readline() if len(line) > cls.MAXLINE: raise ValueError('Line too long while parsing header') headers.append(line) if len(headers) > cls.MAXHEADERS: raise ValueError('Too many headers found while parsing') if line in (b'\r\n', b'\n', b''): break hstring = b''.join(headers).decode('iso-8859-1') parser = email.parser.Parser(_class=http.client.HTTPMessage) return parser.parsestr(hstring)
def extract(mailString): # get email body from the mail string parser = email.parser.HeaderParser() headers = parser.parsestr(mailString) if headers['X-Mailer'] is None: print "CLIENT USED : NOT_FOUND" else: print "CLIENT USED : " + str(headers['X-Mailer']) message = email.message_from_string(mailString) ''' list all header elements ''' # print message.items(); replyString = '' if message.is_multipart(): # message under consideration is multipart entirePayload = message.get_payload() for payload in entirePayload: extractUtility(payload, headers, True) break else: # message under consideration in not multipart payload = message.get_payload() extractUtility(payload, headers, False)
def _collect_headers(strings): headers, parser = {}, email.parser.Parser() for string in strings: headers.update(dict(parser.parsestr(string))) return headers
def writeMessage(string,yearlims = [1970,2020]): global id global parser global catalog global input parsed = parser.parsestr(string) metadata = dict(parsed) # Clean the metadata and make some elements into arrays. try: metadata["Path"] = metadata["Path"].split("!") except: pass try: metadata["Newsgroups"] = metadata["Newsgroups"].split(",") except: pass if "From" in metadata: email = emailName(metadata["From"]) emailFields = email.elements() for key in emailFields.keys(): metadata[key] = emailFields[key] try: metadata["date"] = dateutil.parser.parse(metadata["Date"]).isoformat() year = metadata["date"][:4] if int(year) < yearlims[0] or int(year) > yearlims[1]: year = "" except: pass id += 1 metadata["filename"] = str(id) catalog.write(json.dumps(metadata) + "\n") input.write(str(id) + "\t" + parsed.get_payload().replace("\n"," ").replace("\t"," ") + "\n")
def _process_mail(self, mailbox, uid, flags, idate, msg): """Process the attachments (if any) on an individual mail""" parser = email.parser.Parser() mail = parser.parsestr(msg) found_attachment = False doc_id = None if 'message-id' not in mail: mail['message-id'] = "*****@*****.**" % hashlib.sha1(repr(mail._headers)).hexdigest() logging.warning(" mail %s: no Message-ID, using fake-id %s", uid, mail['message-id']) logging.debug("Message-ID: %s", mail['message-id']) # quick first pass to see if we have an attachment for part in mail.walk(): if self._part_is_attachment(part): found_attachment = True break if not found_attachment: logging.debug("No attachments --> skip (%d bytes)" % len(str(mail))) return if self.db is not None: doc_id = self._save_mail_to_db(mailbox, mail) if self.remove: self._remove_attachments(mail, doc_id, mailbox, uid, flags, idate)
def get_parts(response): """Extract parts from headers. Params: response: a request object Returns: an array of content-ids """ head_lines = '' for k, v in response.raw.getheaders().iteritems(): head_lines += str(k) + ':' + str(v) + '\n' content = str(response.content) full = head_lines + content parser = email.parser.Parser() decoded_reply = parser.parsestr(full) parts = {} start = decoded_reply.get_param('start').lstrip('<').rstrip('>') i = 0 for part in decoded_reply.get_payload(): cid = part.get('content-Id', '').lstrip('<').rstrip('>') if (not start or start == cid) and 'start' not in parts: parts['start'] = part.get_payload() else: parts[cid or 'Attachment%d' % i] = part.get_payload() i += 1 return parts
def _parse_message(self, file_path, data, headersonly=False, clean=True): if headersonly: parser = email.parser.HeaderParser() else: parser = email.parser.Parser() message = parser.parsestr(data, headersonly=headersonly) xmailfile = message['X-Mailfile'].strip() if xmailfile[:1] == '!': xmailfile = self.config.fernet.decrypt(xmailfile[1:]) else: xmailfile = base64.b64decode(xmailfile) metadata = json.loads(xmailfile) if file_path and metadata['fn'] != file_path: raise IOError('File path mismatch: %s' % metadata['fn']) if clean: _clean_metadata(metadata) if headersonly: return metadata for part in message.walk(): if part.get_content_type() == 'application/x-mailfile': contents = part.get_payload() if contents[:1] == '!': contents = self.config.fernet.decrypt(contents[1:]) else: contents = base64.b64decode(contents) return metadata, contents[:metadata['bytes']] raise OSError('No data in message, %s is corrupt?' % (file_path or 'file'))
def __init__(self, config, body, rcpts, parse_rcpts = False, efrom = None): """Create a new message to the given recipients containing the text given in `body`. If no envelope sender address is passed in `efrom`, it is guessed from the message body. If `parse_rcpts` is true, the message body is parsed for additional recipients.""" self.rcpts = set() for rcpt in rcpts: name, addr = email.utils.parseaddr(rcpt) if addr: self.rcpts.add(addr) parser = email.parser.Parser() self.message = parser.parsestr(self.received() + body, True) if efrom is None: if not self.message.has_key("from"): default_from = config.get_general(str, "default_from") if default_from: self.message["From"] = default_from self.efrom = self.guess_envelope_from() else: self.efrom = efrom if not self.message.has_key("from"): self.message["From"] = self.efrom if parse_rcpts: self.add_recipient_addresses() self.fix_headers()
def handle( to, sender, body ): parser = email.parser.Parser() mail = parser.parsestr( body ) message = {} message['to'] = to message['sender'] = unicode(sender) message['subject'] = mail['subject'] message['received'] = time.strftime( "%Y-%m-%d %H:%M:%S" ) message['content'] = "" message['attachments'] = [] for part in mail.walk(): if part.get_content_maintype() == "multipart": continue if not part.get_filename(): if part.get_content_maintype() == "text": message['content'] += part.get_payload( decode = False ) else: attachment = {} attachment['filename'] = part.get_filename() attachment['type'] = part.get_content_type() payload = part.get_payload( decode = True ) attachment['payload-id'] = storage.store_attachment( payload, part.get_content_type() ) message['attachments'].append( attachment) storage.store_mail( message )
def getMessagesForCriteria(M, sender=None, subject=None): returnMessages = [] #Obtain messages from sender "Banana Republic" #--------------------------------------------- searchString = composeSearchString(sender, subject) print "search string: %s" % searchString resp, data = M.search(None, searchString) message_ids = data[0].split() print "# matching messages: %s" % str(len(message_ids)) #--------------------------------------------- parser = email.parser.Parser() #From each IMAP email string, obtain a Message #object #----------------------------------------- for m_id in message_ids: resp, data = M.fetch(m_id, "(RFC822)") newMessage = parser.parsestr(data[0][1]) returnMessages.append(newMessage) #----------------------------------------- return returnMessages
def test_create_mailing_from_message(self): parser = email.parser.Parser() msg = parser.parsestr("""Content-Transfer-Encoding: 7bit Content-Type: multipart/alternative; boundary="===============2840728917476054151==" Subject: Great news! From: Mailing Sender <*****@*****.**> To: <*****@*****.**> Date: Wed, 05 Jun 2013 06:05:56 -0000 This is a multi-part message in MIME format. --===============2840728917476054151== Content-Type: text/plain; charset="windows-1252" Content-Transfer-Encoding: quoted-printable This is a very simple mailing. I=92m happy. --===============2840728917476054151== Content-Type: text/html; charset="windows-1252" Content-Transfer-Encoding: quoted-printable <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <html><head> <META http-equiv=3DContent-Type content=3D"text/html; charset=3Diso-8859-1"> </head> <body> This is <strong> a very simple</strong> <u>mailing</u>. = I=92m happy! Nothing else to say... </body></html> --===============2840728917476054151==-- """) mailing = Mailing.create_from_message(msg, mail_from='*****@*****.**', sender_name='Mailing Sender', scheduled_start=None, scheduled_duration=None) message = parser.parsestr(mailing.header + mailing.body) assert(isinstance(message, email.message.Message)) self.assertTrue(message.is_multipart()) self.assertEquals("multipart/alternative", message.get_content_type()) self.assertIsInstance(message.get_payload(i=0), email.message.Message) self.assertEquals("text/plain", message.get_payload(i=0).get_content_type()) self.assertEquals("windows-1252", message.get_payload(i=0).get_param('charset')) self.assertEquals("text/html", message.get_payload(i=1).get_content_type()) self.assertEquals("windows-1252", message.get_payload(i=1).get_param('charset')) self.assertEquals("This is a very simple mailing. I\x92m happy.", message.get_payload(i=0).get_payload(decode=True)) self.assertIn("This is <strong> a very simple</strong> <u>mailing</u>. I\x92m happy! ", message.get_payload(i=1).get_payload(decode=True))
def decode_message(headers, raw_message): if headers['content-type'].startswith('multipart/'): raw_reply = str(headers) + raw_message parser = email.parser.Parser() decoded_reply = parser.parsestr(raw_reply) payload_parts = (part.get_payload() for part in decoded_reply.get_payload()) return ''.join(payload_parts) else: return raw_message
def listTasks(self, mailbox="INBOX", criterion="(ALL)"): res = [] self.select(mailbox, True) typ, msgnums = self.search('UTF-8', criterion) for num in msgnums[0].split(): typ, data = self.fetch(num, '(BODY[HEADER.FIELDS (SUBJECT FROM)])') parser = email.parser.HeaderParser() msg = parser.parsestr(data[0][1]) text = headerUnicode(msg['From']) + " : " + headerUnicode(msg['Subject']) res.append(text) return res
def name_extractor(path): """ Extracts names from Enron E-mail files """ parser = email.parser.Parser() for root, _, files in os.walk(path): print(root) for message_file in files: path = os.path.join(root, message_file) with codecs.open(path, 'r', 'Latin-1') as message_file: content = message_file.read() message = parser.parsestr(content) yield (message['From'], message['X-From'])
def getHeaders(self, conn, imapid): """ Returns message headers """ (res, data) = conn.fetch(imapid, '(BODY[HEADER])') if res != 'OK': raise RuntimeError('Unvalid reply: ' + res) parser = email.parser.HeaderParser() dv = "".join(map(chr, data[0][1])) #print(dv); return parser.parsestr(dv)
def parse_origin_packages(packages_text): packages = [] linebuffer = "" for line in packages_text.splitlines(): if line == "": parser = email.parser.Parser() package = parser.parsestr(linebuffer) packages.append(package) linebuffer = "" else: linebuffer += line + "\n" return packages
def fetch(self, folder='Inbox'): # Where do we mark messages as seen? parser = email.parser.Parser() self.imap.select(folder) status, uids = self.imap.search(None, 'UNSEEN') uids = uids[0].replace(' ',',') if uids: status, data = self.imap.fetch(uids, '(RFC822)') else: data = [] self.imap.close() return [(uid.split()[0], parser.parsestr(mail)) for (uid, mail) in data[::2]]
def bulk_fetch(uids, fields, chunk_size=100, raw_message=False): """ Fetches a bunch of messages and makes them available as a generator. """ parser = email.parser.Parser() for uid_chunk, pos in chunker(uids, chunk_size): print "Fetching chunk %s" % pos type, data = mail.uid("fetch", ",".join(uid_chunk), fields) if typ != "OK": raise Exception("Failed to retrieve messsages") # Check each message in this batch for msg in data: # Parse out the UID as well if not "(UID " in msg[0]: continue uid = msg[0].split(" ", 4)[2] # Parse the message and pass it back # If raw_message=True, only parse the headers, and return the entire message separately if raw_message: yield uid, parser.parsestr(msg[1], True), msg[1] else: yield uid, parser.parsestr(msg[1])
def notify(ui, account): encoding = locale.getpreferredencoding(False) account_name = account.getname().decode(encoding) conf = get_config(ui) notify_send = functools.partial(send_notification, ui, conf) summary_formatter = MailNotificationFormatter(escape=False, failstr=conf['failstr']) body_formatter = MailNotificationFormatter(escape=True, failstr=conf['failstr']) count = 0 body = [] for folder, contents in ui.new_messages[account].iteritems(): count += len(contents) body.append( body_formatter.format(conf['digest-body'], count=len(contents), folder=folder)) if count > conf['max']: summary = summary_formatter.format(conf['digest-summary'], count=count, account=account_name) return notify_send(summary, '\n'.join(body)) need_body = '{body' in conf['body'] or '{body' in conf['summary'] parser = email.parser.Parser() for folder, contents in ui.new_messages[account].iteritems(): format_args = { 'account': account_name, 'folder': folder.decode(encoding) } for content in contents: message = parser.parsestr(content.get('message'), headersonly=not need_body) format_args['h'] = HeaderDecoder(message, failstr=conf['failstr']) if need_body: for part in message.walk(): if part.get_content_type() == 'text/plain': charset = part.get_content_charset() payload = part.get_payload(decode=True) format_args['body'] = payload.decode(charset) break else: format_args['body'] = conf['failstr'] try: notify_send( summary_formatter.vformat(conf['summary'], (), format_args), body_formatter.vformat(conf['body'], (), format_args)) except (AttributeError, KeyError, TypeError, ValueError) as exc: ui.error(exc, msg='In notification format specification')
def mboxo_generator(input, parser=email.parser.Parser()): '''Yield each message found in a ``input`` in ``mboxo`` / ``mboxrd`` format ''' assert type(input) is file data = [] for line in input: if line[:5] == 'From ' or line == '': if data: yield parser.parsestr(''.join(data)) data = [] elif line == '': raise StopIteration data.append(line)
def send_email(content, smtp_server = config.email_smtp_server, verbose = False): s = smtplib.SMTP(smtp_server) # get the envelope From and To by parsing the message parser = email.parser.Parser() parsed_msg = parser.parsestr(content) from_addr = _get_address(parsed_msg.get("From")) to_addr = _get_address(parsed_msg.get("To")) util.call_verbose("Sending email", verbose, s.sendmail, from_addr, to_addr, content)
def __init__(self, fromlines=None, fromstring=None, fromfile=None): #self.log = Logger() self.recipient = None self.received_by = None self.received_from = None self.received_with = None self.__raw = None parser = email.parser.Parser() # Message is instantiated with fromlines for POP3, fromstring for # IMAP (both of which can be badly-corrupted or invalid, i.e. spam, # MS worms, etc). It's instantiated with fromfile for the output # of filters, etc, which should be saner. if fromlines: try: self.__msg = parser.parsestr(os.linesep.join(fromlines)) except email.errors.MessageError as o: self.__msg = corrupt_message(o, fromlines=fromlines) self.__raw = os.linesep.join(fromlines) elif fromstring: try: self.__msg = parser.parsestr(fromstring) except email.errors.MessageError as o: self.__msg = corrupt_message(o, fromstring=fromstring) self.__raw = fromstring elif fromfile: try: self.__msg = parser.parse(fromfile) except email.errors.MessageError as o: # Shouldn't happen self.__msg = corrupt_message(o, fromstring=fromfile.read()) # fromfile is only used by getmail_maildir, getmail_mbox, and # from reading the output of a filter. Ignore __raw here. else: # Can't happen? raise SystemExit('Message() called with wrong arguments') self.sender = address_no_brackets(self.__msg['return-path'] or 'unknown')
def test_create_mailing_from_message_with_encoded_headers(self): parser = email.parser.Parser() msg = parser.parsestr("""Content-Transfer-Encoding: 7bit Content-Type: multipart/alternative; boundary="===============2840728917476054151==" Subject: Great news! From: =?UTF-8?B?Q2VkcmljIFJJQ0FSRA==?= <*****@*****.**> To: <*****@*****.**> Date: Wed, 05 Jun 2013 06:05:56 -0000 This is a multi-part message in MIME format. --===============2840728917476054151== Content-Type: text/plain; charset="windows-1252" Content-Transfer-Encoding: quoted-printable This is a very simple mailing. I=92m happy. --===============2840728917476054151== Content-Type: text/html; charset="windows-1252" Content-Transfer-Encoding: quoted-printable <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <html><head> <META http-equiv=3DContent-Type content=3D"text/html; charset=3Diso-8859-1"> </head> <body> This is <strong> a very simple</strong> <u>mailing</u>. = I=92m happy! Nothing else to say... </body></html> --===============2840728917476054151==-- """) mailing = Mailing.create_from_message(msg, scheduled_start=None, scheduled_duration=None) message = parser.parsestr(mailing.header + mailing.body) assert(isinstance(message, email.message.Message)) mail_from = header_to_unicode(message.get("From")) self.assertEquals(u"Cedric RICARD <*****@*****.**>", mail_from)
def process(self, peer, mailfrom, rcpttos, data, **params): """Saves email attachments in the specified directory""" parser = email.parser.Parser() msgobj = parser.parsestr(data) for part in msgobj.walk(): if part.is_multipart(): # multipart are just containers continue filename = part.get_filename() if not filename: # Not an attachment continue self.process_attachment(part.get_payload(decode=True), filename, **params)
def parse_origin_packages(uri, version, target_name): packages = [] linebuffer = "" for line in (urlopen( get_target_url(uri, version, target_name) + "/packages/Packages").read().decode().splitlines()): if line == "": parser = email.parser.Parser() package = parser.parsestr(linebuffer) packages.append(package) linebuffer = "" else: linebuffer += line + "\n" return packages
def read_message(path): """ Reads an enron message file into a Message tuple """ parser = email.parser.Parser() with codecs.open(path, 'r', 'Latin-1') as message_file: content = message_file.read() message = parser.parsestr(content) recipients = () if message['To'] is not None: recipients = tuple(m.strip(',') for m in message['To'].split()) return Message(message['From'], recipients, dateutil.parser.parse(message['Date']), message['Subject'], message.get_payload())
def send(self, fromaddr, toaddrs, message): parser = email.parser.Parser() msg = parser.parsestr(message) messageid = msg.get('Message-Id') if messageid: if not messageid.startswith('<') or not messageid.endswith('>'): raise ValueError('Malformed Message-Id header') messageid = messageid[1:-1] else: messageid = self.newMessageId() message = 'Message-Id: <%s>\n%s' % (messageid, message) transaction.get().join( self.createDataManager(fromaddr, toaddrs, message)) return messageid
def reademl(): filepath = input("\nEnter file path: ") try: emlFile = open(filepath, "r") msg = email.message_from_file(emlFile) emlFile.close() parser = email.parser.HeaderParser() header = parser.parsestr(msg.as_string()) print() for h in header.items(): print(*h) print() except: error("Bad file | Encoding error")
def git_am_patch_split(f, encoding=None): """Parse a git-am-style patch and split it up into bits. :param f: File-like object to parse :param encoding: Encoding to use when creating Git objects :return: Tuple with commit object, diff contents and git version """ encoding = encoding or getattr(f, "encoding", "ascii") contents = f.read() if type(contents) is bytes and getattr(email.parser, "BytesParser", None): parser = email.parser.BytesParser() msg = parser.parsebytes(contents) else: parser = email.parser.Parser() msg = parser.parsestr(contents) return parse_patch_message(msg, encoding)
def parse_origin_packages(): get_file( f"{origin_url}/{target_dir}/packages/Packages", rebuild_path / "Packages", ) packages = {} linebuffer = "" for line in (rebuild_path / "Packages").read_text().splitlines(): if line == "": parser = email.parser.Parser() package = parser.parsestr(linebuffer) packages[package["Filename"]] = package linebuffer = "" else: linebuffer += line + "\n" return packages
def test_dkim(self): privkey = self._get_dkim_privkey() mailing = factories.MailingFactory(dkim={'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey}) recipient = factories.RecipientFactory(mailing=mailing) message_str = self._customize(recipient) self.assertNotIn(b"\r\n", message_str) parser = email.parser.Parser() message = parser.parsestr(message_str, headersonly=False) assert (isinstance(message, email.message.Message)) self.assertTrue('DKIM-Signature' in message) # print message['DKIM-Signature'] self.assertTrue(dkim.verify(message_str, dnsfunc=self._get_txt))
def git_am_patch_split(f, encoding=None): """Parse a git-am-style patch and split it up into bits. :param f: File-like object to parse :param encoding: Encoding to use when creating Git objects :return: Tuple with commit object, diff contents and git version """ encoding = encoding or getattr(f, "encoding", "ascii") contents = f.read() if isinstance(contents, bytes) and getattr(email.parser, "BytesParser", None): parser = email.parser.BytesParser() msg = parser.parsebytes(contents) else: parser = email.parser.Parser() msg = parser.parsestr(contents) return parse_patch_message(msg, encoding)
def extractHeaderTuples(self, uid): self.logger.info('Getting header information for {0:d}'.format(uid)) parser = email.parser.HeaderParser() rc, data = self.M.uid('FETCH', uid, '(BODY.PEEK[HEADER.FIELDS (FROM SUBJECT)] UID)') headers = parser.parsestr(data[0][1]) filterValues = [] for header, value in headers.items(): decodedElements = email.header.decode_header(value) decodedValueList = [] for element, charset in decodedElements: if charset == None: decodedValueList.append(element) else: decodedValueList.append(element.decode(charset).encode('utf-8')) filterValues.append((header, ' '.join(decodedValueList))) self.logger.debug('Extracted headers for uid {0:d} - {1}'.format(uid, filterValues)) return filterValues
def metadata(self): if self._metadata == None: # Extract the structured data from METADATA in the WHL's dist-info # directory. with zipfile.ZipFile(self.path(), 'r') as whl: with whl.open(self._dist_info() + '/METADATA') as f: # Why are we using email.parser? # # From PEP-0314: # The PKG-INFO file format is a single set of RFC-822 headers parseable by the rfc822.py module. # The field names listed in the following section are used as the header names. # # The rfc822.py module has been deprecated since version 2.3 in favor of the email package. parser = email.parser.Parser() self._metadata = parser.parsestr(f.read().decode( 'ascii', 'ignore')) return self._metadata
def email_extractor(path): """ Extracts email addresses from Enron E-mail files """ parser = email.parser.Parser() outbox_re = [re.compile(r) for r in ["sent_items$", "sent$", "sent_mail$"]] for root, _, files in os.walk(path): if not any(re.search(root) for re in outbox_re): continue print(root) for message_file in files: path = os.path.join(root, message_file) with codecs.open(path, "r", "Latin-1") as message_file: content = message_file.read() message = parser.parsestr(content) yield message["From"] if message["To"] is not None: for m in message["To"].replace(",", " ").split(): if m is not None: yield m
def email_extractor(path): """ Extracts email addresses from Enron E-mail files """ parser = email.parser.Parser() outbox_re = [re.compile(r) for r in ['sent_items$', 'sent$', 'sent_mail$']] for root, _, files in os.walk(path): if not any(re.search(root) for re in outbox_re): continue print(root) for message_file in files: path = os.path.join(root, message_file) with codecs.open(path, 'r', 'Latin-1') as message_file: content = message_file.read() message = parser.parsestr(content) yield message['From'] if message['To'] is not None: for m in message['To'].replace(',', ' ').split(): if m is not None: yield m
def map(event): message = json.loads(event['Records'][0]['Sns']['Message']) total_jobs = message['total_jobs'] run_id = message['run_id'] job_id = message['job_id'] counts = {} bucket = 'brianz-dev-mapreduce-results' bucket = os.environ['REDUCE_RESULTS_BUCKET'] tmp_file = download_from_s3(message['bucket'], message['key']) parser = email.parser.Parser() for line in _csv_lines_from_filepath(tmp_file): msg = line['message'] eml = parser.parsestr(msg, headersonly=True) _from = eml['From'] _tos = eml.get('To') if not _tos: continue _tos = (t.strip() for t in _tos.split(',')) for from_to in itertools.product([_from], _tos): if from_to not in counts: counts[from_to] = 1 else: counts[from_to] += 1 if not counts: return metadata = { 'job_id': str(job_id), 'run_id': str(run_id), 'total_jobs': str(total_jobs), } key = 'run-%s/mapper-%s-done.csv' % (run_id, job_id) write_csv_to_s3(bucket, key, counts, Metadata=metadata)
def headers(self): if self._headers is None: logger.debug("raw headers: " + repr(self.raw_headers)) headers_buffer = BytesIO(self.raw_headers) try: # py 2 # seekable has to be 0, otherwise it won't parse anything m = httplib.HTTPMessage(headers_buffer, seekable=0) m.readheaders() self._headers = m.dict except TypeError as ex: # py 3 if ex.args[0] == "__init__() got an unexpected keyword argument 'seekable'": parser = email.parser.Parser() m = parser.parsestr(self.raw_headers.decode('iso-8859-1')) self._headers = dict(m.items()) else: raise return self._headers
def test_dkim_and_feedback_loop(self): privkey = self._get_dkim_privkey() mailing = factories.MailingFactory(dkim={'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey}, feedback_loop={'dkim': {'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey}, 'sender_id': 'CloudMailing'}) recipient = factories.RecipientFactory(mailing=mailing) message_str = self._customize(recipient) self.assertNotIn(b"\r\n", message_str) parser = email.parser.Parser() message = parser.parsestr(message_str, headersonly=False) assert (isinstance(message, email.message.Message)) self.assertTrue('Feedback-ID' in message) self.assertEqual(2, len(message.get_all('DKIM-Signature'))) d = dkim.DKIM(message_str) self.assertTrue(d.verify(0, dnsfunc=self._get_txt)) self.assertTrue(d.verify(1, dnsfunc=self._get_txt))
def test_feedback_loop(self): privkey = self._get_dkim_privkey() mailing = factories.MailingFactory(feedback_loop={'dkim': {'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey}, 'sender_id': 'CloudMailing'}, domain_name='cloud-mailing.net') recipient = factories.RecipientFactory(mailing=mailing) message_str = self._customize(recipient) self.assertNotIn(b"\r\n", message_str) parser = email.parser.Parser() message = parser.parsestr(message_str, headersonly=False) assert (isinstance(message, email.message.Message)) self.assertTrue('Feedback-ID' in message) self.assertTrue('DKIM-Signature' in message) # print message['Feedback-ID'] self.assertEqual('%d:cloud-mailing.net:%s:CloudMailing' % (mailing.id, mailing.type), message['Feedback-ID']) self.assertTrue(dkim.verify(message_str, dnsfunc=self._get_txt))
def __init__(self,string,id=None): """ Initializes with an e-mail string and, optionally, a parser. (Operations will be faster if you don't create the parser anew each time. """ global parser self.string = string try: self.parsed = parser.parsestr(string) except UnicodeEncodeError: raise # Creating a uuid a little early. # THIS ALWAYS FAILS. EVERY UUID IS THE SAME. WHY???? if id is None: self.uuid = uuid.uuid1() self.uuid = self.uuid.hex else: self.uuid=id
def extractHeaderTuples(self, uid): self.logger.info('Getting header information for {0:d}'.format(uid)) parser = email.parser.HeaderParser() rc, data = self.M.uid('FETCH', uid, '(BODY.PEEK[HEADER.FIELDS (FROM SUBJECT)] UID)') headers = parser.parsestr(data[0][1]) filterValues = [] for header, value in headers.items(): decodedElements = email.header.decode_header(value) decodedValueList = [] for element, charset in decodedElements: if charset == None: decodedValueList.append(element) else: decodedValueList.append( element.decode(charset).encode('utf-8')) filterValues.append((header, ' '.join(decodedValueList))) self.logger.debug('Extracted headers for uid {0:d} - {1}'.format( uid, filterValues)) return filterValues
def searchMessageSubjects(self, term=None): if (not self.srv): return [] if (not term): return [] matched = [] self.srv.select(readonly=True) search_term = self.buildSearchTerm("Subject", term) typ, data = self.srv.search(None, search_term) for uid in data[0].split(): header = self.srv.fetch(uid, '(BODY[HEADER])') if (header): header_data = header[1][0][1] parser = email.parser.HeaderParser() msg = parser.parsestr(header_data) print "#%s [%s] -> [%s]" % (uid, msg['from'], msg['subject']) if not uid in matched: matched.append(uid) return matched
def getXsubjects(self, num=10): if (not self.srv): return numMessages = self.srv.select(readonly=True)[1][0] typ, data = self.getMessagesReverseOrder() maxNum = num if (numMessages < num): maxNum = numMessages i = 1 for num in data[0].split(): header = self.srv.fetch(num, '(BODY[HEADER])') if (header): header_data = header[1][0][1] parser = email.parser.HeaderParser() msg = parser.parsestr(header_data) print "#%i [%s] -> [%s]" % (i, msg['from'], msg['subject']) i = i + 1 if (i > maxNum): return return None
def dosync(self): try: a, b = self.M.sort("DATE", "UTF-8", "UNSEEN") except: a, b = self.M.search("UTF-8", "UNSEEN") if a == "OK" and len(b) > 0 and len(b[0]) > 0: # print(b) flood_excess = 0 for id in b[0].split(): data = self.M.fetch(id, "(RFC822)") if data[1][0][0 : len(id)] == id: header_data = data[1][1][1] else: header_data = data[1][0][1] parser = email.parser.HeaderParser() msg = parser.parsestr(header_data) msg = "".join([self.format_header(header, msg) for header in self.notifier.headers]) for chan in self.notifier.noticed: self.notifier.notice(chan.split(" ", 1)[0], msg) flood_excess += 1 if flood_excess >= 5: time.sleep(2) elif a != "OK": print(a)
def asMIMEText(self, num): typ, data = self.fetch(num, '(RFC822)') parser = email.parser.Parser() msg = parser.parsestr(data[0][1]) return msg
async def parse(self): requestline = await asyncio.wait_for(self.reader.readline(), self.keep_alive_timeout) if not requestline: return self.requestline = requestline.strip().decode() if not self.requestline: return words = self.requestline.split(' ') assert len(words) == 3, 'Bad request syntax (%r)' % self.requestline self.method, self.path, version = words assert version.startswith('HTTP/'), 'Bad request version (%r)' % version version_number = version[5:].split('.') assert len(version_number) == 2, 'Bad request version (%r)' % version protocol_version = tuple(map(int, version_number)) if protocol_version >= (2, 0): raise errors.HTTPError(505, "Invalid HTTP Version (%s)" % version) if protocol_version >= (1, 1): self.keep_alive = True if protocol_version < self.protocol_version: self.protocol_version = protocol_version # Examine the headers and look for a Connection directive. header_lines = [] while True: line = await asyncio.wait_for(self.reader.readline(), self.keep_alive_timeout) if not line.strip(): break header_lines.append(line.decode()) try: parser = email.parser.Parser(_class=http.client.HTTPMessage) self.headers = parser.parsestr(''.join(header_lines)) except http.client.LineTooLong: raise errors.HTTPError(400, "Line too long") conntype = self.headers.get('Connection', "") if conntype.lower() == 'close': self.keep_alive = False elif conntype.lower() == 'keep-alive' and protocol_version >= (1, 1): self.keep_alive = True self.env['SERVER_PROTOCOL'] = 'HTTP/%d.%d' % protocol_version self.env['REQUEST_METHOD'] = self.method self.env['CONTENT_TYPE'] = self.headers.get('content-type') self.env['CONTENT_LENGTH'] = self.headers.get('content-length') for key, value in self.headers.items(): key = key.replace('-', '_').upper() if key in self.env: continue key = 'HTTP_' + key value = value.strip() oldvalue = self.env.get(key) if oldvalue is None: self.env[key] = value else: self.env[key] = oldvalue + ',' + value self.env['REQUEST_URI'] = self.path host = self.env.get('HTTP_HOST') self.port = None if host: hostname, _, port = host.rpartition(':') if _: self.hostname = hostname self.port = int(port) self._accept = self.init_q(self.headers.get('accept')) self._accept_encoding = self.init_q(self.headers.get('accept-encoding')) return True
def mark(self, contents): parser = email.parser.Parser() message = parser.parsestr(contents) self.date(message) self.user_agent(message) return message.as_string()
args = parser.parse_args() with open(args.template) as fid: template = fid.read() sender = {"mail": "Bartosz Telenczuk <*****@*****.**>", "firstname": "Bartosz"} students = csv.DictReader(open(args.emails_csv)) parser = email.parser.Parser() messages = [] email_addresses = [] for student in students: email_txt = template.format(firstname=student["firstname"]) msg = parser.parsestr(email_txt) msg["From"] = sender["mail"] msg["To"] = student["mail"] print(msg) msg.set_charset("utf-8") messages.append((sender["mail"], student["mail"], msg)) email_addresses.append(student["mail"]) r = "No" if args.send == "yes": print("\n" + "\n".join(email_addresses)) r = input('Do you really want to send messages to these recipients (if yes type "Yes")? ') r = r.lower() if r == "yes": import smtplib