示例#1
0
    def test_from_regex (self):
        # Testing new regex from bug #1633678
        f = open(self._path, 'w')
        f.write("""From [email protected] Mon May 31 13:24:50 2004 +0200
Subject: message 1

body1
From [email protected] Mon May 31 13:24:50 2004 -0200
Subject: message 2

body2
From [email protected] Mon May 31 13:24:50 2004
Subject: message 3

body3
From [email protected] Mon May 31 13:24:50 2004
Subject: message 4

body4
""")
        f.close()
        box = mailbox.UnixMailbox(open(self._path, 'r'))
        self.assert_(len(list(iter(box))) == 4)
示例#2
0
def main():
    import mailbox

    print('Reading input file...')
    f = open("mbox", 'rb')
    mbox = mailbox.UnixMailbox(f)
    msglist = []
    while 1:
        msg = mbox.next()
        if msg is None:
            break
        m = make_message(msg)
        msglist.append(m)
    f.close()

    print('Threading...')
    subject_table = thread(msglist)

    # Output
    L = subject_table.items()
    L.sort()
    for subj, container in L:
        print_container(container)
示例#3
0
 def _selectBox(self):
     # mBox Strict
     if self.boxtype.get() == self.boxtyps[0]:
         self.mb = mailbox.UnixMailbox(file(self.mailbox.get(), 'r'))
         self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(),
                   " Opened Successfully.")
     # mBox Loose
     elif self.boxtype.get() == self.boxtyps[1]:
         self.mb = mailbox.PortableUnixMailbox(file(self.mailbox.get(),
                                                    'r'))
         self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(),
                   " Opened Successfully.")
     # MailDir
     elif self.boxtype.get() == self.boxtyps[2]:
         self.mb = mailbox.Maildir(os.path.dirname(self.mailbox.get()))
         self.Disp(self.boxtype.get(), " at location ",
                   os.path.dirname(self.mailbox.get()),
                   " Opened Successfully.")
     # MMDF
     elif self.boxtype.get() == self.boxtyps[3]:
         self.mb = mailbox.MmdfMailbox(file(self.mailbox.get(), 'r'))
         self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(),
                   " Opened Successfully.")
     # MH
     elif self.boxtype.get() == self.boxtyps[4]:
         self.mb = mailbox.MHMailbox(file(self.mailbox.get(), 'r'))
         self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(),
                   " Opened Successfully.")
     # Babyl
     elif self.boxtype.get() == self.boxtyps[5]:
         self.mb = mailbox.BabylMailbox(file(self.mailbox.get(), 'r'))
         self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(),
                   " Opened Successfully.")
     #Unknown File Type
     else:
         self.Disp("*** I don't know about that file type.")
         self.running = 2
def load_from_file():

    if tornado.options.options.init:
        delete_index()
    create_index()

    if tornado.options.options.skip:
        logging.info("Skipping first %d messages from mbox file" %
                     tornado.options.options.skip)

    count = 0
    upload_data = list()
    logging.info("Starting import from file %s" %
                 tornado.options.options.infile)
    mbox = mailbox.UnixMailbox(open(tornado.options.options.infile, 'rb'),
                               email.message_from_file)

    emailParser = DelegatingEmailParser(
        [AmazonEmailParser(), SteamEmailParser()])

    for msg in mbox:
        count += 1
        if count < tornado.options.options.skip:
            continue
        item = convert_msg_to_json(msg)
        if item:
            upload_data.append(item)
            if len(upload_data) == tornado.options.options.batch_size:
                upload_batch(upload_data)
                upload_data = list()

    # upload remaining items in `upload_batch`
    if upload_data:
        upload_batch(upload_data)

    logging.info("Import done - total count %d" % count)
示例#5
0
    def setupUpdatesMethod(self, numUpdates):
        """ this method prepares a datastructure for the updates test.
            we are reading the first n mails from the primary mailbox.
            they are used for the update test
        """

        i = 0
        dict = {}

        mb = mailbox.UnixMailbox(open(mbox, "r"))

        msg = mb.next()
        while msg and i < numUpdates:

            obj = testMessage(msg)

            mid = msg.dict.get("message-id", None)
            if mid:
                dict[mid] = obj
                i = i + 1

            msg = mb.next()

        return dict
示例#6
0
#!/usr/bin/env python
import mailbox  # read various mailbox formats (incl. Unix format)
import smtplib  # tools for sending mail
import string

# open a part of my outgoing mailbox, for resending:
mbox = mailbox.UnixMailbox(open("testbox", "r"))
while 1:
    msg = mbox.next()  # grab next message
    if not msg: break
    # can extract mail header fields like a dictionary,
    # e.g. msg['To'], msg['From'], msg['cc'], msg['date'] etc.
    # msg.keys() lists all keys for this message
    # msg.fp.read() gets the body of the message
    # str(msg) is the header of the message

    # send message to msg['To'], those on the cc list, and myself:
    to = [msg['To']]
    if msg.has_key('cc'):
        to += map(string.strip, msg['cc'].split(','))
    to.append(msg['From'])  # add myself

    message = str(msg) + """

Due to an error with my email connection, the email I sent
you on %s may not have reached you.
A copy of the message is inserted below.
I apologize if you end up with multiple copies of this message.


===============================================================================
示例#7
0
        json_part = {}
        if part.get_content_maintype() == 'multipart':
            continue

        json_part['contentType'] = part.get_content_type()
        content = part.get_payload(decode=False).decode('utf-8', 'ignore')
        json_part['content'] = cleanContent(content)

        json_msg['parts'].append(json_part)

    # Finally, convert date from asctime to milliseconds since epoch using the
    # $date descriptor so it imports "natively" as an ISODate object in MongoDB
    then = parse(json_msg['Date'])
    millis = int(
        time.mktime(then.timetuple()) * 1000 + then.microsecond / 1000)
    json_msg['Date'] = {'$date': millis}

    return json_msg


mbox = mailbox.UnixMailbox(open(MBOX, 'rb'), email.message_from_file)

# Write each message out as a JSON object on a separate line
# for easy import into MongoDB via mongoimport

f = open(OUT_FILE, 'w')
for msg in gen_json_msgs(mbox):
    if msg != None:
        f.write(json.dumps(msg, cls=Encoder) + '\n')
f.close()
示例#8
0
#!/usr/bin/python

# This program removes duplicated messages from a mailbox
import mailbox

fp = open('/var/spool/mail/gregb')
mb = mailbox.UnixMailbox(fp)

outfp = open('/tmp/mailbox-gregb', 'w')

prev = None
msg = mb.next()
while msg is not None:
    try:
        msgid = msg['Message-ID']
    except:
        msgid = None
    if (prev is not None) and (msgid is not None) and (prev == msgid):
        print "Skipping duplicate message", msgid
        msg = mb.next()
        continue
    msg.fp.seek(msg.startofheaders)
    outfp.write(msg.fp.read())
    print "Wrote", msgid
    msg = mb.next()
示例#9
0
if __name__ == '__main__':

    import networkx as nx
    try: 
        import matplotlib.pyplot as plt
    except:
        pass

    if len(sys.argv)==1:
        file="unix_email.mbox"
    else:
        file=sys.argv[1]
    fp=open(file,"r")

    mbox = mailbox.UnixMailbox(fp, msgfactory) # parse unix mailbox

    G=nx.MultiDiGraph() # create empty graph

    # parse each messages and build graph 
    for msg in mbox: # msg is python email.Message.Message object
        (source_name,source_addr) = parseaddr(msg['From']) # sender
        # get all recipients
        # see http://www.python.org/doc/current/lib/module-email.Utils.html
        tos = msg.get_all('to', [])
        ccs = msg.get_all('cc', [])
        resent_tos = msg.get_all('resent-to', [])
        resent_ccs = msg.get_all('resent-cc', [])
        all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
        # now add the edges for this mail message
        for (target_name,target_addr) in all_recipients:
示例#10
0
def inc():
    import Zope2, thread
    min, max = atoi(sys.argv[3]), atoi(sys.argv[4])
    count = max - min
    try:
        threads = atoi(sys.argv[5])
    except:
        threads = 1
        wait = 0
    else:
        try:
            wait = atof(sys.argv[6])
        except:
            wait = 0.25
        wait = wait * 2

    count = count / threads
    max = min + count

    omin = min

    db = Zope2.DB

    size = db.getSize()
    mem = VmSize()
    t = time.time()
    c = time.clock()

    mbox = sys.argv[2]
    argss = []
    for i in range(threads):
        amin = min + i * count
        dest = 'maili%s' % amin
        initmaili(dest)
        f = open(mbox)
        mb = mailbox.UnixMailbox(f)
        j = 0
        while j < amin:
            mb.next()
            j = j + 1
        lock = thread.allocate_lock()
        lock.acquire()

        def returnf(t, c, size, mem, r, lock=lock):
            print c, r
            lock.release()

        argss.append((lock, (dest, mb, f, count, wait), returnf))

    for lock, args, returnf in argss:
        thread.start_new_thread(do, (Zope2.DB, loadinc, args, returnf))

    for lock, args, returnf in argss:
        lock.acquire()

    t = time.time() - t
    c = time.clock() - c
    size = db.getSize() - size
    mem = VmSize() - mem

    print t, c, size, mem

    #hist("%s-%s-%s" % (omin, count, threads))

    Zope2.DB.close()
示例#11
0
    latest = previous
    last_processed = None

    # process updated mbox files
    for file in glob(archive):
        if int(previous) >= int(os.stat(file).st_mtime): continue

        # open gzipped/raw file
        if file.endswith('.gz'):
            fh = gzip.open(file)
        else:
            fh = open(file)

        # process each multipart message in the mailbox
        for msg in iter(mailbox.UnixMailbox(fh, email.message_from_file)):
            last_processed = msg['Date']

            if msg.is_multipart():
                detach(msg)
            elif '919-573-9199' in msg.get_payload():
                if '-----BEGIN PGP SIGNATURE-----' in msg.get_payload().split(
                        "\n"):
                    msg.add_header('Content-Disposition',
                                   'attachment',
                                   filename='pgp.txt')
                    wrapper = email.message.Message()
                    wrapper.attach(msg)
                    for header in msg.keys():
                        wrapper[header] = msg[header]
                    detach(wrapper)
示例#12
0
import mailbox

mb = mailbox.UnixMailbox(open("/var/spool/mail/effbot"))

while 1:
    msg = next(mb)
    if not msg:
        break
    for k, v in list(msg.items()):
        print(k, "=", v)
    body = msg.fp.read()
    print(len(body), "bytes in body")

## subject = for he's a ...
## message-id = <*****@*****.**>
## received = (from [email protected])
##  by spam.egg (8.8.7/8.8.5) id CAA03202
##  for effbot; Fri, 15 Oct 1999 02:27:36 +0200
## from = Fredrik Lundh <*****@*****.**>
## date = Fri, 15 Oct 1999 12:35:36 +0200
## to = [email protected]
## 1295 bytes in body
示例#13
0
'''
mailbox 模块

mailbox 模块用来处理各种不同类型的邮箱格式

大部分邮箱格式使用文本文件储存纯 RFC 822 信息, 用分割行区别不同的信息.
'''
import mailbox

help(mailbox)
mb = mailbox.UnixMailbox(open(''))









示例#14
0
	def __init__(self, reader):
		sqmail.gui.fetcher.Fetcher.__init__(self, reader, "Spool Read")

		filename = sqmail.preferences.get_incomingpath()
		self.msg("Using spool file "+filename)
		self.msg("Locking spool file")
		rv = os.system("lockfile-create --retry 1 "+filename)
		if rv:
			self.msg("Failed to lock spool file, aborting")
			self.do_abort()
			return

		self.msg("Opening spool file")

		fp = open(filename, "r+")
		fp.seek(0, 2)
		len = fp.tell()
		fp.seek(0, 0)

		if (len == 0):
			self.msg("Spool file empty. Aborting.")
			fp.close()
			os.system("lockfile-remove "+filename)
			self.do_abort()
			return

		mbox = mailbox.UnixMailbox(fp)
		count = 0
		
		self.msg("Reading messages")
		while 1:
			self.progress(fp.tell(), len)
			msg = sqmail.message.Message()
			mboxmsg = mbox.next()
			if not mboxmsg:
				break

			msg.loadfrommessage(mboxmsg)
			msg.savealltodatabase()
			count = count + 1

			if self.abort:
				self.msg("Aborted!")
				self.msg("(Duplicate messages remain in spool file.)")
				break

		self.msg(str(count)+" message(s) read")

		if not self.abort:
			if sqmail.preferences.get_deleteremote():
				self.msg("All messages read; truncating spool file")
				fp.truncate(0)
			else:
				self.msg("All messages read. Leaving mail in spool file. " \
					"(Fetching again will result in duplicate messages "\
					"in your database.)")

		self.msg("Closing and unlocking spool file")
		fp.close()
		os.system("lockfile-remove "+filename)
		if not self.abort:
			self.do_abort()