Exemplo n.º 1
0
    def _pollMbox(self):
        #        return
        #
        #    def poll(self, irc, msg, args):
        file_name = self.registryValue('mbox')
        if not file_name: return
        boxFile = open(file_name, 'r+b')
        _lock_file(boxFile)
        self.log.debug('Polling mbox %r' % boxFile)

        try:
            box = mailbox.PortableUnixMailbox(boxFile, _message_factory)
            bugmails = []
            for message in box:
                if message == '': continue
                self.log.debug('Parsing message %s' % message['Message-ID'])
                try:
                    bugmails.append(bugmail.Bugmail(message))
                except bugmail.NotBugmailException:
                    continue
                except:
                    self.log.exception('Exception while parsing message:')
                    self.log.debug("Message:\n%s" % message.as_string())
            boxFile.truncate(0)
        finally:
            _unlock_file(boxFile)
            boxFile.close()

        self._handleBugmails(bugmails)
Exemplo n.º 2
0
 def process_mbox(self):
     #open a MBOX file and process all its content
     self.cache = {}
     mb = mailbox.PortableUnixMailbox(open(self.path, 'rb'))
     msg = next(mb)
     starting, ending = None, None
     while msg is not None:
         document = msg.fp.read()
         if document is not None:
             m = mbox_email(''.join(msg.headers))
             d = msg.getdate('Date')
             s = m.getSubject()
             (result, reason) = self.run_rules(s)
             if result:
                 if s == '': s = '(no subject)'
                 from_addr = m.getFrom()
                 f = from_addr[0]
                 if not f: f = from_addr[1]
                 index = self.get_unique_id(m)
                 start, stop = get_start_stop(msg)
                 self.cache[index] = (index, start, stop - start, s, d, f,
                                      m.getMessageID(), m.getInReplyTo(),
                                      m.getTo(), m.getCC())
                 #process starting, ending
                 if starting is None: starting = d
                 else:
                     if d < starting: starting = d
                 if ending is None: ending = d
                 else:
                     if d > ending: ending = d
             msg = next(mb)
     self.starting, self.ending = starting, ending
     mb = None
Exemplo n.º 3
0
 def filedeconstructor(self, fn):
     '''Checks if given file object is message or mailbox.
     If no, returns text contents of file or empty string if file is binary.
     Parses message/mailbox for relevant headers adding urls to list of items
     and returns text parts for further searching.'''
     # binary check from mercurial.util
     fp = open(fn, 'rb')
     try:
         text = fp.read()
         if '\0' in text:
             return ''
         elif self.ui.text:
             return text
         msg = _msgfactory(fp)
         if not msg:
             return text
         # else it's a message or a mailbox
         if not msg['message-id']:
             hint = ('make sure input is a raw message'
                     ' - in mutt: unset pipe_decode -,'
                     ' or use -t/--text to disable message detection')
             raise util.DeadMan('no message-id found', hint=hint)
         if not msg.get_unixfrom():
             textlist = self.msgharvest(msg)
         else: # treat text like a mailbox because it might be one
             textlist = [] # list of strings to search
             mbox = mailbox.PortableUnixMailbox(fp, _msgfactory)
             while msg is not None:
                 msg = mbox.next()
                 if msg:
                     textlist += self.msgharvest(msg)
     finally:
         fp.close()
     return '\n'.join(textlist)
Exemplo n.º 4
0
def scan_file(filename, compress, overwrite, nospinner):
    """Gets IDs of messages in the specified mbox file"""
    # file will be overwritten
    if overwrite:
        return []
    else:
        assert('bzip2' != compress)

    # file doesn't exist
    if not os.path.exists(filename):
        print "File %s: not found" % filename
        return []

    spinner = Spinner("File %s" % filename, nospinner)

    # open the file
    if compress == 'gzip':
        mbox = gzip.GzipFile(filename, 'rb')
    elif compress == 'bzip2':
        mbox = bz2.BZ2File(filename, 'rb')
    else:
        mbox = file(filename, 'rb')

    messages = {}

    # each message
    i = 0
    for message in mailbox.PortableUnixMailbox(mbox):
        header = ''
        # We assume all messages on disk have message-ids
        try:
            header = ''.join(message.getfirstmatchingheader('message-id'))
        except KeyError:
            # No message ID was found. Warn the user and move on
            print
            print "WARNING: Message #%d in %s" % (i, filename),
            print "has no Message-Id header."

        header = BLANKS_RE.sub(' ', header.strip())
        try:
            msg_id = MSGID_RE.match(header).group(1)
            if msg_id not in messages.keys():
                # avoid adding dupes
                messages[msg_id] = msg_id
        except AttributeError:
            # Message-Id was found but could somehow not be parsed by regexp
            # (highly bloody unlikely)
            print
            print "WARNING: Message #%d in %s" % (i, filename),
            print "has a malformed Message-Id header."
        spinner.spin()
        i = i + 1

    # done
    mbox.close()
    spinner.stop()
    print ": %d messages" % (len(messages.keys()))
    return messages
Exemplo n.º 5
0
def mbox_train(h, path, is_spam, force):
    """Train bayes with a Unix mbox"""

    if loud:
        print "  Reading as Unix mbox"

    import mailbox
    import fcntl

    # Open and lock the mailbox.  Some systems require it be opened for
    # writes in order to assert an exclusive lock.
    f = file(path, "r+b")
    fcntl.flock(f, fcntl.LOCK_EX)
    mbox = mailbox.PortableUnixMailbox(f, get_message)

    outf = os.tmpfile()
    counter = 0
    trained = 0

    for msg in mbox:
        if not msg:
            print "Malformed message number %d.  I can't train on this mbox, sorry." % counter
            return
        counter += 1
        if loud and counter % 10 == 0:
            sys.stdout.write("\r%6d" % counter)
            sys.stdout.flush()
        if msg_train(h, msg, is_spam, force):
            trained += 1
        if options["Headers", "include_trained"]:
            # Write it out with the Unix "From " line
            outf.write(mboxutils.as_string(msg, True))

    if options["Headers", "include_trained"]:
        outf.seek(0)
        try:
            os.ftruncate(f.fileno(), 0)
            f.seek(0)
        except:
            # If anything goes wrong, don't try to write
            print "Problem truncating mbox--nothing written"
            raise
        try:
            for line in outf.xreadlines():
                f.write(line)
        except:
            print >> sys.stderr("Problem writing mbox!  Sorry, "
                                "I tried my best, but your mail "
                                "may be corrupted.")
            raise

    fcntl.flock(f, fcntl.LOCK_UN)
    f.close()
    if loud:
        sys.stdout.write("\r%6d" % counter)
        sys.stdout.write("\r  Trained %d out of %d messages\n" %
                         (trained, counter))
Exemplo n.º 6
0
def main(mailbox_path):
    addresses = {}
    mb = mailbox.PortableUnixMailbox(file(mailbox_path))
    for msg in mb:
        toaddr = msg.getaddr('To')[1]
        addresses[toaddr] = 1
    addresses = addresses.keys()
    addresses.sort()
    for address in addresses:
        print address
 def test_unix_mbox(self):
     ### should be better!
     import email.Parser
     fname = self.createMessage("cur", True)
     n = 0
     for msg in mailbox.PortableUnixMailbox(open(fname),
                                            email.Parser.Parser().parse):
         n += 1
         self.assertEqual(msg["subject"], "Simple Test")
         self.assertEqual(len(str(msg)), len(FROM_)+len(DUMMY_MESSAGE))
     self.assertEqual(n, 1)
Exemplo n.º 8
0
def count(fname):
    fp = open(fname, 'rb')
    mbox = mailbox.PortableUnixMailbox(fp, get_message)
    goodcount = 0
    badcount = 0
    for msg in mbox:
        if msg["to"] is None and msg["cc"] is None:
            badcount += 1
        else:
            goodcount += 1
    fp.close()
    return goodcount, badcount
def main(filename, attachment_dir):
    emails = load_results()
    num = 0
    with open(filename, 'rb') as fp:
        mb = mailbox.PortableUnixMailbox(fp, factory=email.message_from_file)
        if not os.path.exists(attachment_dir):
            os.mkdir(attachment_dir)
        for message in mb:
            num += 1

            if num - 1 in emails:
                if num % 1000 == 0:
                    print "skipping", num
                continue
            attachments = return_attachments(message)
            # We only want to store those emails that have attachments
            if attachments:
                saved_attachments = []
                for a in attachments:
                    content_hash = hashlib.sha256(a['content']).hexdigest()
                    new_location = attachment_dir + '/' + content_hash
                    if not os.path.exists(new_location):
                        with open(new_location, 'wb') as f:
                            f.write(a['content'])
                    saved_attachments.append({
                        'filename':
                        convert_to_unicode(a['filename']),
                        'hash':
                        content_hash
                    })

                if message['date']:
                    date = convert_to_unicode(message['date'])
                else:
                    date = None
                subject = convert_to_unicode(message['subject'])
                froms = map(convert_to_unicode,
                            message.get_all('from', 'ignore'))
                tos = map(convert_to_unicode, message.get_all('to', 'ignore'))

                emails[num - 1] = {
                    'attachments': saved_attachments,
                    'date': date,
                    'subject': subject,
                    'froms': froms,
                    'tos': tos
                }
            if num % 1000 == 0:
                print num
                save_results(emails)
    print num
Exemplo n.º 10
0
    def getFileMessageIds(self, filename):
        """Gets IDs of messages in the specified mbox file"""

        try:
            mbox = self.output.getStream(filename, mode='r')
            mbox = mbox.files[0]

            if not mbox:
                # no valid file exists
                return []
        except IOError:
            # file does not exist
            return []

        messages = {}

        # each message
        i = 0
        for message in mailbox.PortableUnixMailbox(mbox):
            header = ''
            # We assume all messages on disk have message-ids
            try:
                header = ''.join(message.getfirstmatchingheader('message-id'))
            except KeyError:
                # No message ID was found. Warn the user and move on
                self.logger.warn(
                    "Message {id} in {file} has no Message-Id header".format(
                        id=i, file=filename))

            header = self.BLANKS_RE.sub(' ', header.strip())
            try:
                msg_id = self.MSGID_RE.match(header).group(1)
                if msg_id not in messages.keys():
                    # avoid adding dupes
                    messages[msg_id] = msg_id
            except AttributeError:
                # Message-Id was found but could somehow not be parsed by regexp
                # (highly bloody unlikely)
                self.logger.warn(
                    'Message {id} in {file} has a malformed Message-Id header.'
                    .format(id=i, file=filename))

            i = i + 1

        mbox.close()

        return messages
Exemplo n.º 11
0
 def boxparser(self, path, maildir=False, isspool=False):
     if (not isspool and path == self.mspool or
         self.ui.mask and self.ui.mask.search(path) is not None):
         return
     if maildir:
         try:
             dl = os.listdir(path)
         except OSError:
             return
         for d in 'cur', 'new', 'tmp':
             if d not in dl:
                 return
         mbox = mailbox.Maildir(path, _msgfactory)
     else:
         try:
             fp = open(path, 'rb')
         except IOError, inst:
             self.ui.warn('%s\n' % inst)
             return
         mbox = mailbox.PortableUnixMailbox(fp, _msgfactory)
Exemplo n.º 12
0
def load_from_file():

    if tornado.options.options.init:
        delete_index()
    create_index()

    if tornado.options.options.skip:
        logging.info("Skipping first %d messages from mbox file" %
                     tornado.options.options.skip)

    count = 0
    upload_data = list()
    logging.info("Starting import from file %s" %
                 tornado.options.options.infile)
    mbox = mailbox.PortableUnixMailbox(
        open(tornado.options.options.infile, 'rb'), email.message_from_file)
    #//logging.info("mLen: %d" %mailbox.UnixMailbox.__len__())

    emailParser = DelegatingEmailParser(
        [AmazonEmailParser(), SteamEmailParser()])

    for msg in mbox:
        count += 1

        if not count % 100:
            logging.info("Item %d" % count)

        if count < tornado.options.options.skip:
            continue
        item = convert_msg_to_json(msg)
        if item:
            upload_data.append(item)
            if len(upload_data) == tornado.options.options.batch_size:
                upload_batch(upload_data)
                upload_data = list()

    # upload remaining items in `upload_batch`
    if upload_data:
        upload_batch(upload_data)

    logging.info("Import done - total count %d" % count)
Exemplo n.º 13
0
 def _selectBox(self):
     # mBox Strict
     if self.boxtype.get() == self.boxtyps[0]:
         self.mb = mailbox.UnixMailbox(file(self.mailbox.get(), 'r'))
         self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(),
                   " Opened Successfully.")
     # mBox Loose
     elif self.boxtype.get() == self.boxtyps[1]:
         self.mb = mailbox.PortableUnixMailbox(file(self.mailbox.get(),
                                                    'r'))
         self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(),
                   " Opened Successfully.")
     # MailDir
     elif self.boxtype.get() == self.boxtyps[2]:
         self.mb = mailbox.Maildir(os.path.dirname(self.mailbox.get()))
         self.Disp(self.boxtype.get(), " at location ",
                   os.path.dirname(self.mailbox.get()),
                   " Opened Successfully.")
     # MMDF
     elif self.boxtype.get() == self.boxtyps[3]:
         self.mb = mailbox.MmdfMailbox(file(self.mailbox.get(), 'r'))
         self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(),
                   " Opened Successfully.")
     # MH
     elif self.boxtype.get() == self.boxtyps[4]:
         self.mb = mailbox.MHMailbox(file(self.mailbox.get(), 'r'))
         self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(),
                   " Opened Successfully.")
     # Babyl
     elif self.boxtype.get() == self.boxtyps[5]:
         self.mb = mailbox.BabylMailbox(file(self.mailbox.get(), 'r'))
         self.Disp(self.boxtype.get(), " at location ", self.mailbox.get(),
                   " Opened Successfully.")
     #Unknown File Type
     else:
         self.Disp("*** I don't know about that file type.")
         self.running = 2
Exemplo n.º 14
0
def process_mailbox(f, dosa=1, pats=None):
    gen = email.Generator.Generator(sys.stdout, maxheaderlen=0)
    for msg in mailbox.PortableUnixMailbox(f, Parser().parse):
        process_message(msg, dosa, pats)
        gen.flatten(msg, unixfrom=1)
Exemplo n.º 15
0
            continue
        if save_lines:
            stripped_line = line.strip()
            if not stripped_line:
                break
            full_error_lines.append(stripped_line)
    return full_error_lines

for e in os.listdir(mail_directory):
    if not re.search(r'^\d+$', e):
        continue
    filename = os.path.join(mail_directory, e)
    with open(filename, 'rb') as fp:
        # A hack to parse Gnus's nnml format mail folders:
        raw_email = re.sub(r'^X-From-Line: ', 'From ', fp.read())
        for mail in mailbox.PortableUnixMailbox(StringIO.StringIO(raw_email)):
            dt = dateutil.parser.parse(mail['date'])
            if dt < since:
                continue
            if dt > until:
                continue
            if 'subject' not in mail:
                continue
            subject = mail['subject']
            if not re.search(r'^Mail delivery failed', subject):
                continue
            failed_recipient = mail.getheader('X-Failed-Recipients').strip()
            bounced.add(failed_recipient)
            print failed_recipient
            print >> sys.stderr, "\n".join("    " + l for l in get_errors(mail))
Exemplo n.º 16
0
#!/usr/bin/python
import email
import mailbox
import sys
if len(sys.argv) <2:
print("Usage: %s [path to mailbox file]" % sys.argv[0])
sys.exit([1])
path = sys.argv[1]
fp = open(path, 'rb')
subjects = []
for message in mailbox.PortableUnixMailbox(fp, email.message_from_file):
    subjects.append(message['Subject'])
print('s message(s) in mailbox "%s":' % (len(subjects), path))
for subject in subjects:
    print('', subject)
Exemplo n.º 17
0
 def get_mailbox(self, filename):
     return mailbox.PortableUnixMailbox(
         open(os.path.join(self.mbox_dir, filename)))
Exemplo n.º 18
0
#!/usr/bin/python
import email
import mailbox
import os
import sys

if len(sys.argv) < 3:
    print "%s <{mailbox}> <mime-type>" % sys.argv[0]
    sys.exit(-1)

fp = file(sys.argv[1], 'rb')

mbox = mailbox.PortableUnixMailbox(fp, email.message_from_file)

for i in mbox:
    if i.is_multipart():
        l = i.get_payload()
        for ii in l:
            if ii.get_content_type()[:len(sys.argv[2])] == sys.argv[2]:
                l.remove(ii)
    print i
Exemplo n.º 19
0
eudoraDir = r"t:\data\luc\eudora"


def getAttrOrNone(msg, name):
    if msg.has_key(name):
        return msg[name]
    return None


if __name__ == "__main__":

    for fn in os.listdir(eudoraDir):
        (root, ext) = os.path.splitext(fn)
        if ext == '.mbx':
            pfn = os.path.join(eudoraDir, fn)
            print "\nfound mailbox %s\n" % pfn
            f = file(pfn)
            mb = mailbox.PortableUnixMailbox(f, email.Message)
            count = 0
            while True:
                msg = mb.next()
                if msg is None:
                    break
                print getAttrOrNone(msg,'date'),\
                      getAttrOrNone(msg,'from'),\
                      getAttrOrNone(msg,'to'),\
                      getAttrOrNone(msg,'subject')
                count += 1
            print "\n%s contains %d messages\n" % (fn, count)
Exemplo n.º 20
0
        if not (0 < percent < 100):
            raise ValueError
        percent /= 100.0
        bin1 = args[2]
        bin2 = args[3]
    except IndexError:
        usage(1, 'Not enough arguments')
    except ValueError:
        usage(1, 'Percent argument must be a float between 1.0 and 99.0')

    # Cruise
    bin1out = open(bin1, 'wb')
    bin2out = open(bin2, 'wb')
    infp = open(mboxfile, 'rb')

    mbox = mailbox.PortableUnixMailbox(infp, mboxutils.get_message)
    for msg in mbox:
        if random.random() < percent:
            outfp = bin1out
        else:
            outfp = bin2out
        astext = str(msg)
        assert astext.endswith('\n')
        outfp.write(astext)

    outfp.close()
    bin1out.close()
    bin2out.close()


if __name__ == '__main__':
Exemplo n.º 21
0
def getmbox(name):
    """Return an mbox iterator given a file/directory/folder name."""

    if name == "-":
        return [get_message(sys.stdin)]

    if name.startswith("+"):
        # MH folder name: +folder, +f1,f2,f2, or +ALL
        name = name[1:]
        import mhlib
        mh = mhlib.MH()
        if name == "ALL":
            names = mh.listfolders()
        elif ',' in name:
            names = name.split(',')
        else:
            names = [name]
        mboxes = []
        mhpath = mh.getpath()
        for name in names:
            filename = os.path.join(mhpath, name)
            mbox = mailbox.MHMailbox(filename, get_message)
            mboxes.append(mbox)
        if len(mboxes) == 1:
            return iter(mboxes[0])
        else:
            return _cat(mboxes)

    elif name.startswith(":"):
        # IMAP mailbox name:
        #   :username:password@server:folder1,...folderN
        #   :username:password@server:port:folder1,...folderN
        #   :username:password@server:ALL
        #   :username:password@server:port:ALL
        parts = re.compile(
            ':(?P<user>[^@:]+):(?P<pwd>[^@]+)@(?P<server>[^:]+(:[0-9]+)?):(?P<name>[^:]+)'
        ).match(name).groupdict()

        from scripts.sb_imapfilter import IMAPSession, IMAPFolder
        from spambayes import Stats, message
        from spambayes.Options import options

        session = IMAPSession(parts['server'])
        session.login(parts['user'], parts['pwd'])
        folder_list = session.folder_list()

        if name == "ALL":
            names = folder_list
        else:
            names = parts['name'].split(',')

        message_db = message.Message().message_info_db
        stats = Stats.Stats(options, message_db)
        mboxes = [IMAPFolder(n, session, stats) for n in names]

        if len(mboxes) == 1:
            return full_messages(mboxes[0])
        else:
            return _cat([full_messages(x) for x in mboxes])

    if os.path.isdir(name):
        # XXX Bogus: use a Maildir if /cur is a subdirectory, else a MHMailbox
        # if the pathname contains /Mail/, else a DirOfTxtFileMailbox.
        if os.path.exists(os.path.join(name, 'cur')):
            mbox = mailbox.Maildir(name, get_message)
        elif name.find("/Mail/") >= 0:
            mbox = mailbox.MHMailbox(name, get_message)
        else:
            mbox = DirOfTxtFileMailbox(name, get_message)
    else:
        fp = open(name, "rb")
        mbox = mailbox.PortableUnixMailbox(fp, get_message)
    return iter(mbox)
Exemplo n.º 22
0
 def get_mailbox(self, filename):
     return mailbox.PortableUnixMailbox(open(self.mbox_dir + filename))