Exemplo n.º 1
0
    def command(self, search=None):
        session, idx, start = self._do_search(search=search)

        nodes = []
        links = []
        res = {}

        for messageid in session.results:
            message = Email(self._idx(), messageid)
            try:
                msgfrom = ExtractEmails(message.get("from"))[0].lower()
            except IndexError, e:
                print "No e-mail address in '%s'" % message.get("from")
                continue

            msgto = [x.lower() for x in ExtractEmails(message.get("to"))]
            msgcc = [x.lower() for x in ExtractEmails(message.get("cc"))]
            msgbcc = [x.lower() for x in ExtractEmails(message.get("bcc"))]

            if msgfrom not in [m["email"] for m in nodes]:
                nodes.append({"email": msgfrom})

            for msgset in [msgto, msgcc, msgbcc]:
                for address in msgset:
                    if address not in [m["email"] for m in nodes]:
                        nodes.append({"email": address})

                curnodes = [x["email"] for x in nodes]
                fromid = curnodes.index(msgfrom)
                searchspace = [m for m in links if m["source"] == fromid]
                for recipient in msgset:
                    index = curnodes.index(recipient)
                    link = [m for m in searchspace if m["target"] == index]
                    if len(link) == 0:
                        links.append({
                            "source": fromid,
                            "target": index,
                            "value": 1
                        })
                    elif len(link) == 1:
                        link[0]["value"] += 1
                    else:
                        raise ValueError(
                            "Too many links! - This should never happen.")

            if len(nodes) >= 200:
                # Let's put a hard upper limit on how many nodes we can have, for performance reasons.
                # There might be a better way to do this though...
                res["limit_hit"] = True
                break
Exemplo n.º 2
0
    def command(self, emails=None):
        session, config, idx = self.session, self.session.config, self._idx()
        args = list(self.args)

        bounce_to = []
        while args and '@' in args[-1]:
            bounce_to.append(args.pop(-1))
        for rcpt in (self.data.get('to', []) +
                     self.data.get('cc', []) +
                     self.data.get('bcc', [])):
            bounce_to.extend(ExtractEmails(rcpt))

        if not emails:
            args.extend(['=%s' % mid for mid in self.data.get('mid', [])])
            mids = self._choose_messages(args)
            emails = [Email(idx, i) for i in mids]

        # Process one at a time so we don't eat too much memory
        sent = []
        missing_keys = []
        for email in emails:
            try:
                msg_mid = email.get_msg_info(idx.MSG_MID)
                # FIXME: We are failing to capture error states with sufficient
                #        granularity, messages may be delivered to some
                #        recipients but not all...
                SendMail(session, [PrepareMessage(config,
                                                  email.get_msg(pgpmime=False),
                                                  rcpts=(bounce_to or None))])
                sent.append(email)
            except KeyLookupError, kle:
                session.ui.warning(_('Missing keys %s') % kle.missing)
                missing_keys.extend(kle.missing)
                self._ignore_exception()
            except:
Exemplo n.º 3
0
    def command(self, emails=None):
        session, config, idx = self.session, self.session.config, self._idx()

        bounce_to = []
        while self.args and '@' in self.args[-1]:
            bounce_to.append(self.args.pop(-1))
        for rcpt in (self.data.get('to', []) + self.data.get('cc', []) +
                     self.data.get('bcc', [])):
            bounce_to.extend(ExtractEmails(rcpt))

        args = self.args[:]
        if not emails:
            args.extend(['=%s' % mid for mid in self.data.get('mid', [])])
            mids = self._choose_messages(args)
            emails = [Email(idx, i) for i in mids]

        # Process one at a time so we don't eat too much memory
        sent = []
        missing_keys = []
        for email in emails:
            try:
                msg_mid = email.get_msg_info(idx.MSG_MID)
                SendMail(session, [
                    PrepareMessage(config,
                                   email.get_msg(pgpmime=False),
                                   rcpts=(bounce_to or None))
                ])
                sent.append(email)
            except KeyLookupError, kle:
                missing_keys.extend(kle.missing)
                self._ignore_exception()
            except:
Exemplo n.º 4
0
    def TransformOutgoing(self, sender, rcpts, msg, **kwargs):
        matched = False
        gnupg = None

        sender_keyid = None
        if self.config.prefs.openpgp_header:
            try:
                gnupg = gnupg or GnuPG(self.config)
                seckeys = dict([(uid["email"], fp) for fp, key
                                in gnupg.list_secret_keys().iteritems()
                                if key["capabilities_map"].get("encrypt")
                                and key["capabilities_map"].get("sign")
                                for uid in key["uids"]])
                sender_keyid = seckeys.get(sender)
            except (KeyError, TypeError, IndexError, ValueError):
                traceback.print_exc()

        if sender_keyid and self.config.prefs.openpgp_header:
            msg["OpenPGP"] = ("id=%s; preference=%s"
                              % (sender_keyid,
                                 self.config.prefs.openpgp_header))

        if ('attach-pgp-pubkey' in msg and
                msg['attach-pgp-pubkey'][:3].lower() in ('yes', 'tru')):
            # FIXME: Check attach_pgp_pubkey for instructions on which key(s)
            #        to attach. Attaching all of them may be a bit lame.
            gnupg = gnupg or GnuPG(self.config)
            keys = gnupg.address_to_keys(ExtractEmails(sender)[0])
            key_count = 0
            for fp, key in keys.iteritems():
                if not any(key["capabilities_map"].values()):
                    continue
                # We should never really hit this more than once. But if we
                # do, should still be fine.
                keyid = key["keyid"]
                data = gnupg.get_pubkey(keyid)

                try:
                    from_name = key["uids"][0]["name"]
                    filename = _('Encryption key for %s.asc') % from_name
                except:
                    filename = _('My encryption key.asc')
                att = MIMEBase('application', 'pgp-keys')
                att.set_payload(data)
                encoders.encode_base64(att)
                del att['MIME-Version']
                att.add_header('Content-Id', MakeContentID())
                att.add_header('Content-Disposition', 'attachment',
                               filename=filename)
                att.signature_info = SignatureInfo(parent=msg.signature_info)
                att.encryption_info = EncryptionInfo(parent=msg.encryption_info)
                msg.attach(att)
                key_count += 1

            if key_count > 0:
                msg['x-mp-internal-pubkeys-attached'] = "Yes"

        return sender, rcpts, msg, matched, True
Exemplo n.º 5
0
  def __init__(self, session, idx,
               results=None, start=0, end=None, num=None, expand=None):
    dict.__init__(self)
    self.session = session
    self.expand = expand
    self.idx = idx

    results = results or session.results
    if not results:
      self._set_values([], 0, 0, 0)
      return

    terms = session.searched
    num = num or session.config.get('num_results', 20)
    if end: start = end - num
    if start > len(results): start = len(results)
    if start < 0: start = 0

    rv = []
    count = 0
    expand_ids = [e.msg_idx for e in (expand or [])]
    for mid in results[start:start+num]:
      count += 1
      msg_info = idx.get_msg_by_idx(mid)
      result = self._explain_msg_summary([
        msg_info[MailIndex.MSG_IDX],
        msg_info[MailIndex.MSG_ID],
        msg_info[MailIndex.MSG_FROM],
        msg_info[MailIndex.MSG_SUBJECT],
        msg_info[MailIndex.MSG_DATE],
        msg_info[MailIndex.MSG_TAGS].split(','),
        session.config.is_editable_message(msg_info[MailIndex.MSG_PTRS])
      ])
      result['tags'] = sorted([idx.config['tag'].get(t,t)
                               for t in idx.get_tags(msg_info=msg_info)
                                     if 'tag:%s' % t not in terms])
      if not expand:
        conv = idx.get_conversation(msg_info)
      else:
        conv = [msg_info]
      conv_from = [c[MailIndex.MSG_FROM] for c in conv]

      result['short_from'] = self._compact(self._names(conv_from), 25)
      result['conv_count'] = len(conv)
      result['conv_idxs'] = [c[MailIndex.MSG_IDX] for c in conv]
      # FIXME: conv_people should look stuff in our contact list
      result['conv_people'] = people = [{
        'email': (ExtractEmails(p) or [''])[0],
        'name': self._name(p),
      } for p in list(set(conv_from))]
      people.sort(key=lambda i: i['name']+i['email'])

      if expand and mid in expand_ids:
        exp_email = expand[expand_ids.index(mid)]
        result['message'] = self._message_details([exp_email])[0]
      rv.append(result)

    self._set_values(rv, start, count, len(results))
Exemplo n.º 6
0
    def index_email(self, session, email):
        msg = email.get_msg()
        msg_info = email.get_msg_info()
        mbox_idx = msg_info[self.MSG_PTRS].split(',')[0][:MBX_ID_LEN]

        msg_subj = self.hdr(msg, 'subject')
        msg_to = ExtractEmails(self.hdr(msg, 'to'))
        msg_cc = (ExtractEmails(self.hdr(msg, 'cc')) +
                  ExtractEmails(self.hdr(msg, 'bcc')))

        kw, sn = self.index_message(session,
                                    email.msg_mid(),
                                    msg_info[self.MSG_ID],
                                    msg,
                                    email.get_msg_size(),
                                    long(msg_info[self.MSG_DATE], 36),
                                    mailbox=mbox_idx,
                                    compact=False,
                                    filter_hooks=[self.filter_keywords])

        tags = [
            k.split(':')[0] for k in kw
            if k.endswith(':in') or k.endswith(':tag')
        ]

        snippet_max = session.config.sys.snippet_max
        self.edit_msg_info(msg_info,
                           msg_from=self.hdr(msg, 'from'),
                           msg_to=msg_to,
                           msg_cc=msg_cc,
                           msg_subject=msg_subj,
                           msg_body=sn[:max(0, snippet_max - len(msg_subj))])

        self.set_msg_at_idx_pos(email.msg_idx_pos, msg_info)

        # Reset the internal tags on this message
        for tag_id in [t for t in msg_info[self.MSG_TAGS].split(',') if t]:
            tag = session.config.get_tag(tag_id)
            if tag and tag.slug.startswith('mp_'):
                self.remove_tag(session, tag_id, msg_idxs=[email.msg_idx_pos])

        # Add normal tags implied by a rescan
        print 'Applying %s' % tags
        for tag_id in tags:
            self.add_tag(session, tag_id, msg_idxs=[email.msg_idx_pos])
Exemplo n.º 7
0
    def command(self, emails=None):
        session, config, idx = self.session, self.session.config, self._idx()

        bounce_to = []
        while self.args and '@' in self.args[-1]:
            bounce_to.append(self.args.pop(-1))
        for rcpt in (self.data.get('to', []) +
                     self.data.get('cc', []) +
                     self.data.get('bcc', [])):
            bounce_to.extend(ExtractEmails(rcpt))

        args = self.args[:]
        if not emails:
            args.extend(['=%s' % mid for mid in self.data.get('mid', [])])
            mids = self._choose_messages(args)
            emails = [Email(idx, i) for i in mids]

        # Process one at a time so we don't eat too much memory
        sent = []
        for email in emails:
            try:
                msg_mid = email.get_msg_info(idx.MSG_MID)
                SendMail(session, [PrepareMail(email,
                                               rcpts=(bounce_to or None))])
                sent.append(email)
            except:
                session.ui.error('Failed to send %s' % email)
                self._ignore_exception()

        if 'compose' in config.sys.debug:
            sys.stderr.write(('compose/Sendit: Send %s to %s (sent: %s)\n'
                              ) % (len(emails),
                                   bounce_to or '(header folks)', sent))

        if sent:
            if self.BLANK_TAG:
                self._untag_emails(sent, self.BLANK_TAG)
            if self.DRAFT_TAG:
                self._untag_emails(sent, self.DRAFT_TAG)
            if self.SENT_TAG:
                self._tag_emails(sent, self.SENT_TAG)
            for email in sent:
                idx.index_email(self.session, email)

            return self._return_search_results(sent, sent=sent)
        else:
            return self._error('Nothing was sent')
Exemplo n.º 8
0
 def _explain_msg_summary(self, info):
     msg_ts = long(info[6], 36)
     days_ago = (time.time() - msg_ts) / (24 * 3600)
     msg_date = datetime.datetime.fromtimestamp(msg_ts)
     date = msg_date.strftime("%Y-%m-%d")
     urlmap = UrlMap(self.session)
     expl = {
         'mid': info[0],
         'id': info[1],
         'from': info[2],
         'from_email': ', '.join(ExtractEmails(info[2])),
         'to': info[3],
         'subject': info[4],
         'snippet': info[5],
         'timestamp': msg_ts,
         'shorttime': msg_date.strftime("%H:%M"),
         'date': date,
         'tag_ids': info[7],
         'url': urlmap.url_thread(info[0])
     }
     if info[8]:
         expl['editing_url'] = urlmap.url_edit(info[0])
     return expl
Exemplo n.º 9
0
    def command(self, emails=None):
        session, config, idx = self.session, self.session.config, self._idx()
        args = list(self.args)

        if self.session.config.sys.lockdown:
            return self._error(_('In lockdown, doing nothing.'))

        bounce_to = []
        while args and '@' in args[-1]:
            bounce_to.append(args.pop(-1))
        for rcpt in (self.data.get('to', []) + self.data.get('cc', []) +
                     self.data.get('bcc', [])):
            bounce_to.extend(ExtractEmails(rcpt))

        if not emails:
            args.extend(['=%s' % mid for mid in self.data.get('mid', [])])
            mids = self._choose_messages(args)
            emails = [Email(idx, i) for i in mids]

        # Process one at a time so we don't eat too much memory
        sent = []
        missing_keys = []
        for email in emails:
            events = []
            try:
                msg_mid = email.get_msg_info(idx.MSG_MID)

                # This is a unique sending-ID. This goes in the public (meant
                # for debugging help) section of the event-log, so we take
                # care to not reveal details about the message or recipients.
                msg_sid = sha1b64(email.get_msg_info(idx.MSG_ID),
                                  *sorted(bounce_to))[:8]

                # We load up any incomplete events for sending this message
                # to this set of recipients. If nothing is in flight, create
                # a new event for tracking this operation.
                events = list(
                    config.event_log.incomplete(source=self.EVENT_SOURCE,
                                                data_mid=msg_mid,
                                                data_sid=msg_sid))
                if not events:
                    events.append(
                        config.event_log.log(source=self.EVENT_SOURCE,
                                             flags=Event.RUNNING,
                                             message=_('Sending message'),
                                             data={
                                                 'mid': msg_mid,
                                                 'sid': msg_sid
                                             }))

                SendMail(session, msg_mid, [
                    PrepareMessage(config,
                                   email.get_msg(pgpmime=False),
                                   rcpts=(bounce_to or None),
                                   events=events)
                ])
                for ev in events:
                    ev.flags = Event.COMPLETE
                    config.event_log.log_event(ev)
                sent.append(email)
            except KeyLookupError, kle:
                # This is fatal, we don't retry
                message = _('Missing keys %s') % kle.missing
                for ev in events:
                    ev.flags = Event.COMPLETE
                    ev.message = message
                    config.event_log.log_event(ev)
                session.ui.warning(message)
                missing_keys.extend(kle.missing)
                self._ignore_exception()
            # FIXME: Also fatal, when the SMTP server REJECTS the mail
            except:
Exemplo n.º 10
0
    def TransformOutgoing(self, sender, rcpts, msg, **kwargs):
        matched = False
        gnupg = None
        sender_keyid = None

        # Prefer to just get everything from the profile VCard, in the
        # common case...
        profile = self.config.vcards.get_vcard(sender)
        if profile:
            sender_keyid = profile.pgp_key
            crypto_format = profile.crypto_format or 'none'
        else:
            crypto_format = 'none'

        # Parse the openpgp_header data from the crypto_format
        openpgp_header = [
            p.split(':')[-1] for p in crypto_format.split('+')
            if p.startswith('openpgp_header:')
        ]
        if not openpgp_header:
            openpgp_header = self.config.prefs.openpgp_header and ['CFG']

        if openpgp_header[0] != 'N' and not sender_keyid:
            # This is a fallback: this shouldn't happen much in normal use
            try:
                gnupg = gnupg or GnuPG(self.config, event=GetThreadEvent())
                seckeys = dict([
                    (uid["email"], fp)
                    for fp, key in gnupg.list_secret_keys().iteritems()
                    if key["capabilities_map"].get("encrypt")
                    and key["capabilities_map"].get("sign")
                    for uid in key["uids"]
                ])
                sender_keyid = seckeys.get(sender)
            except (KeyError, TypeError, IndexError, ValueError):
                traceback.print_exc()

        if sender_keyid and openpgp_header:
            preference = {
                'ES': 'signencrypt',
                'SE': 'signencrypt',
                'E': 'encrypt',
                'S': 'sign',
                'N': 'unprotected',
                'CFG': self.config.prefs.openpgp_header
            }[openpgp_header[0].upper()]
            msg["OpenPGP"] = ("id=%s; preference=%s" %
                              (sender_keyid, preference))

        if ('attach-pgp-pubkey' in msg
                and msg['attach-pgp-pubkey'][:3].lower() in ('yes', 'tru')):
            gnupg = gnupg or GnuPG(self.config, event=GetThreadEvent())
            if sender_keyid:
                keys = gnupg.list_keys(selectors=[sender_keyid])
            else:
                keys = gnupg.address_to_keys(ExtractEmails(sender)[0])

            key_count = 0
            for fp, key in keys.iteritems():
                if not any(key["capabilities_map"].values()):
                    continue
                # We should never really hit this more than once. But if we
                # do, should still be fine.
                keyid = key["keyid"]
                data = gnupg.get_pubkey(keyid)

                try:
                    from_name = key["uids"][0]["name"]
                    filename = _('Encryption key for %s.asc') % from_name
                except:
                    filename = _('My encryption key.asc')
                att = MIMEBase('application', 'pgp-keys')
                att.set_payload(data)
                encoders.encode_base64(att)
                del att['MIME-Version']
                att.add_header('Content-Id', MakeContentID())
                att.add_header('Content-Disposition',
                               'attachment',
                               filename=filename)
                att.signature_info = SignatureInfo(parent=msg.signature_info)
                att.encryption_info = EncryptionInfo(
                    parent=msg.encryption_info)
                msg.attach(att)
                key_count += 1

            if key_count > 0:
                msg['x-mp-internal-pubkeys-attached'] = "Yes"

        return sender, rcpts, msg, matched, True
Exemplo n.º 11
0
    def command(self, emails=None):
        session, config, idx = self.session, self.session.config, self._idx()
        args = list(self.args)

        bounce_to = []
        while args and '@' in args[-1]:
            bounce_to.append(args.pop(-1))
        for rcpt in (self.data.get('to', []) + self.data.get('cc', []) +
                     self.data.get('bcc', [])):
            bounce_to.extend(ExtractEmails(rcpt))

        sender = self.data.get('from', [None])[0]
        if not sender and bounce_to:
            sender = idx.config.get_profile().get('email', None)

        if not emails:
            args.extend(['=%s' % mid for mid in self.data.get('mid', [])])
            emails = [
                self._actualize_ephemeral(i)
                for i in self._choose_messages(args, allow_ephemeral=True)
            ]

        # First make sure the draft tags are all gone, so other edits either
        # fail or complete while we wait for the lock.
        with GLOBAL_EDITING_LOCK:
            self._tag_drafts(emails, untag=True)
            self._tag_blank(emails, untag=True)

        # Process one at a time so we don't eat too much memory
        sent = []
        missing_keys = []
        locked_keys = []
        for email in emails:
            events = []
            try:
                msg_mid = email.get_msg_info(idx.MSG_MID)

                # This is a unique sending-ID. This goes in the public (meant
                # for debugging help) section of the event-log, so we take
                # care to not reveal details about the message or recipients.
                msg_sid = sha1b64(email.get_msg_info(idx.MSG_ID),
                                  *sorted(bounce_to))[:8]

                # We load up any incomplete events for sending this message
                # to this set of recipients. If nothing is in flight, create
                # a new event for tracking this operation.
                events = list(
                    config.event_log.incomplete(source=self.EVENT_SOURCE,
                                                data_mid=msg_mid,
                                                data_sid=msg_sid))
                if not events:
                    events.append(
                        config.event_log.log(source=self.EVENT_SOURCE,
                                             flags=Event.RUNNING,
                                             message=_('Sending message'),
                                             data={
                                                 'mid': msg_mid,
                                                 'sid': msg_sid
                                             }))

                SendMail(session, msg_mid, [
                    PrepareMessage(config,
                                   email.get_msg(pgpmime=False),
                                   sender=sender,
                                   rcpts=(bounce_to or None),
                                   bounce=(True if bounce_to else False),
                                   events=events)
                ])
                for ev in events:
                    ev.flags = Event.COMPLETE
                    config.event_log.log_event(ev)
                sent.append(email)

            # Encryption related failures are fatal, don't retry
            except (KeyLookupError, EncryptionFailureError,
                    SignatureFailureError), exc:
                message = unicode(exc)
                session.ui.warning(message)
                if hasattr(exc, 'missing_keys'):
                    missing_keys.extend(exc.missing)
                if hasattr(exc, 'from_key'):
                    # FIXME: We assume signature failures happen because
                    # the key is locked. Are there any other reasons?
                    locked_keys.append(exc.from_key)
                for ev in events:
                    ev.flags = Event.COMPLETE
                    ev.message = message
                    config.event_log.log_event(ev)
                self._ignore_exception()

            # FIXME: Also fatal, when the SMTP server REJECTS the mail
            except:
Exemplo n.º 12
0
class MailIndex:
    """This is a lazily parsing object representing a mailpile index."""

    MSG_MID = 0
    MSG_PTRS = 1
    MSG_ID = 2
    MSG_DATE = 3
    MSG_FROM = 4
    MSG_TO = 5
    MSG_SUBJECT = 6
    MSG_SNIPPET = 7
    MSG_TAGS = 8
    MSG_REPLIES = 9
    MSG_CONV_MID = 10

    def __init__(self, config):
        self.config = config
        self.STATS = {}
        self.INDEX = []
        self.INDEX_SORT = {}
        self.INDEX_CONV = []
        self.PTRS = {}
        self.MSGIDS = {}
        self.EMAILS = []
        self.EMAIL_IDS = {}
        self.CACHE = {}
        self.MODIFIED = set()
        self.EMAILS_SAVED = 0

    def l2m(self, line):
        return line.decode('utf-8').split(u'\t')

    # A translation table for message parts stored in the index, consists of
    # a mapping from unicode ordinals to either another unicode ordinal or
    # None, to remove a character. By default it removes the ASCII control
    # characters and replaces tabs and newlines with spaces.
    NORM_TABLE = dict(
        [(i, None) for i in range(0, 0x20)], **{
            ord(u'\t'): ord(u' '),
            ord(u'\r'): ord(u' '),
            ord(u'\n'): ord(u' '),
            0x7F: None
        })

    def m2l(self, message):
        # Normalize the message before saving it so we can be sure that we will
        # be able to read it back later.
        parts = [unicode(p).translate(self.NORM_TABLE) for p in message]
        return (u'\t'.join(parts)).encode('utf-8')

    def load(self, session=None):
        self.INDEX = []
        self.CACHE = {}
        self.PTRS = {}
        self.MSGIDS = {}
        self.EMAILS = []
        self.EMAIL_IDS = {}
        CachedSearchResultSet.DropCaches()

        def process_line(line):
            try:
                line = line.strip()
                if line.startswith('#'):
                    pass
                elif line.startswith('@'):
                    pos, email = line[1:].split('\t', 1)
                    pos = int(pos, 36)
                    while len(self.EMAILS) < pos + 1:
                        self.EMAILS.append('')
                    self.EMAILS[pos] = unquote(email)
                    self.EMAIL_IDS[unquote(email).lower()] = pos
                elif line:
                    words = line.split('\t')
                    # FIXME: Delete this old crap.
                    if len(words) == 10:
                        # This is an old index file, reorder it!
                        pos, p, unused, msgid, d, f, s, t, r, c = words
                        ptrs = ','.join(['0' + ptr for ptr in p.split(',')])
                        line = '\t'.join(
                            [pos, ptrs, msgid, d, f, '', s, '', t, r, c])
                    else:
                        pos, ptrs, msgid = words[:3]
                    pos = int(pos, 36)
                    while len(self.INDEX) < pos + 1:
                        self.INDEX.append('')
                    self.INDEX[pos] = line
                    self.MSGIDS[msgid] = pos
                    for msg_ptr in ptrs.split(','):
                        self.PTRS[msg_ptr] = pos
            except ValueError:
                pass

        if session:
            session.ui.mark('Loading metadata index...')
        try:
            fd = open(self.config.mailindex_file(), 'r')
            for line in fd:
                if line.startswith(GPG_BEGIN_MESSAGE):
                    for line in decrypt_gpg([line], fd):
                        process_line(line)
                else:
                    process_line(line)
            fd.close()
        except IOError:
            if session:
                session.ui.warning(('Metadata index not found: %s') %
                                   self.config.mailindex_file())
        self.cache_sort_orders(session)
        if session:
            session.ui.mark('Loaded metadata, %d messages' % len(self.INDEX))
        self.EMAILS_SAVED = len(self.EMAILS)

    def save_changes(self, session=None):
        mods, self.MODIFIED = self.MODIFIED, set()
        if mods or len(self.EMAILS) > self.EMAILS_SAVED:
            if session:
                session.ui.mark("Saving metadata index changes...")
            fd = gpg_open(self.config.mailindex_file(),
                          self.config.prefs.gpg_recipient, 'a')
            for eid in range(self.EMAILS_SAVED, len(self.EMAILS)):
                fd.write('@%s\t%s\n' % (b36(eid), quote(self.EMAILS[eid])))
            for pos in mods:
                fd.write(self.INDEX[pos] + '\n')
            fd.close()
            flush_append_cache()
            if session:
                session.ui.mark("Saved metadata index changes")
            self.EMAILS_SAVED = len(self.EMAILS)

    def save(self, session=None):
        self.MODIFIED = set()
        if session:
            session.ui.mark("Saving metadata index...")
        fd = gpg_open(self.config.mailindex_file(),
                      self.config.prefs.gpg_recipient, 'w')
        fd.write('# This is the mailpile.py index file.\n')
        fd.write('# We have %d messages!\n' % len(self.INDEX))
        for eid in range(0, len(self.EMAILS)):
            fd.write('@%s\t%s\n' % (b36(eid), quote(self.EMAILS[eid])))
        for item in self.INDEX:
            fd.write(item + '\n')
        fd.close()
        flush_append_cache()
        if session:
            session.ui.mark("Saved metadata index")

    def update_ptrs_and_msgids(self, session):
        session.ui.mark('Updating high level indexes')
        for offset in range(0, len(self.INDEX)):
            message = self.l2m(self.INDEX[offset])
            if len(message) > self.MSG_CONV_MID:
                self.MSGIDS[message[self.MSG_ID]] = offset
                for msg_ptr in message[self.MSG_PTRS].split(','):
                    self.PTRS[msg_ptr] = offset
            else:
                session.ui.warning('Bogus line: %s' % line)

    def try_decode(self, text, charset):
        for cs in (charset, 'iso-8859-1', 'utf-8'):
            if cs:
                try:
                    return text.decode(cs)
                except (UnicodeEncodeError, UnicodeDecodeError, LookupError):
                    pass
        return "".join(i for i in text if ord(i) < 128)

    def hdr(self, msg, name, value=None):
        try:
            if value is None and msg:
                # Security: RFC822 headers are not allowed to have (unencoded)
                # non-ascii characters in them, so we just strip them all out
                # before parsing.
                # FIXME: This is "safe", but can we be smarter/gentler?
                value = CleanText(msg[name], replace='_').clean
            # Note: decode_header does the wrong thing with "quoted" data.
            decoded = email.header.decode_header((value
                                                  or '').replace('"', ''))
            return (' '.join([self.try_decode(t[0], t[1]) for t in decoded
                              ])).replace('\r',
                                          ' ').replace('\t',
                                                       ' ').replace('\n', ' ')
        except email.errors.HeaderParseError:
            return ''

    def update_location(self, session, msg_idx_pos, msg_ptr):
        msg_info = self.get_msg_at_idx_pos(msg_idx_pos)
        msg_ptrs = msg_info[self.MSG_PTRS].split(',')
        self.PTRS[msg_ptr] = msg_idx_pos

        # If message was seen in this mailbox before, update the location
        for i in range(0, len(msg_ptrs)):
            if (msg_ptrs[i][:MBX_ID_LEN] == msg_ptr[:MBX_ID_LEN]):
                msg_ptrs[i] = msg_ptr
                msg_ptr = None
                break
        # Otherwise, this is a new mailbox, record this sighting as well!
        if msg_ptr:
            msg_ptrs.append(msg_ptr)

        msg_info[self.MSG_PTRS] = ','.join(msg_ptrs)
        self.set_msg_at_idx_pos(msg_idx_pos, msg_info)

    def _parse_date(self, date_hdr):
        """Parse a Date: or Received: header into a unix timestamp."""
        try:
            if ';' in date_hdr:
                date_hdr = date_hdr.split(';')[-1].strip()
            msg_ts = long(rfc822.mktime_tz(rfc822.parsedate_tz(date_hdr)))
            if (msg_ts > (time.time() + 24 * 3600)) or (msg_ts < 1):
                return None
            else:
                return msg_ts
        except (ValueError, TypeError, OverflowError):
            return None

    def _extract_date_ts(self, session, msg_mid, msg_id, msg, last_date):
        """Extract a date, sanity checking against the Received: headers."""
        hdrs = [self.hdr(msg, 'date')] + (msg.get_all('received') or [])
        dates = [self._parse_date(date_hdr) for date_hdr in hdrs]
        msg_ts = dates[0]
        nz_dates = sorted([d for d in dates if d])

        if nz_dates:
            median = nz_dates[len(nz_dates) / 2]
            if msg_ts and abs(msg_ts - median) < 31 * 24 * 3600:
                return msg_ts
            else:
                session.ui.warning(
                    ('=%s/%s using Recieved: instead of Date:') %
                    (msg_mid, msg_id))
                return median
        else:
            # If the above fails, we assume the messages in the mailbox are in
            # chronological order and just add 1 second to the date of the last
            # message if date parsing fails for some reason.
            session.ui.warning('=%s/%s has a bogus date' % (msg_mid, msg_id))
            return last_date + 1

    def scan_mailbox(self, session, mailbox_idx, mailbox_fn, mailbox_opener):
        try:
            mbox = mailbox_opener(session, mailbox_idx)
            if mbox.editable:
                session.ui.mark('%s: Skipped: %s' % (mailbox_idx, mailbox_fn))
                return 0
            else:
                session.ui.mark('%s: Checking: %s' % (mailbox_idx, mailbox_fn))
        except (IOError, OSError, NoSuchMailboxError), e:
            session.ui.mark(
                ('%s: Error opening: %s (%s)') % (mailbox_idx, mailbox_fn, e))
            return 0

        unparsed = mbox.unparsed()
        if not unparsed:
            return 0

        if len(self.PTRS.keys()) == 0:
            self.update_ptrs_and_msgids(session)

        snippet_max = session.config.sys.snippet_max
        added = 0
        msg_ts = int(time.time())
        for ui in range(0, len(unparsed)):
            if mailpile.util.QUITTING:
                break

            i = unparsed[ui]
            parse_status = ('%s: Reading your mail: %d%% (%d/%d messages)') % (
                mailbox_idx, 100 * ui / len(unparsed), ui, len(unparsed))

            msg_ptr = mbox.get_msg_ptr(mailbox_idx, i)
            if msg_ptr in self.PTRS:
                if (ui % 317) == 0:
                    session.ui.mark(parse_status)
                continue
            else:
                session.ui.mark(parse_status)

            # Message new or modified, let's parse it.
            msg = ParseMessage(mbox.get_file(i), pgpmime=False)
            msg_id = b64c(
                sha1b64((self.hdr(msg, 'message-id') or msg_ptr).strip()))
            if msg_id in self.MSGIDS:
                self.update_location(session, self.MSGIDS[msg_id], msg_ptr)
                added += 1
            else:
                # Add new message!
                msg_mid = b36(len(self.INDEX))

                msg_ts = self._extract_date_ts(session, msg_mid, msg_id, msg,
                                               msg_ts)

                keywords, snippet = self.index_message(
                    session,
                    msg_mid,
                    msg_id,
                    msg,
                    msg_ts,
                    mailbox=mailbox_idx,
                    compact=False,
                    filter_hooks=[self.filter_keywords])

                msg_subject = self.hdr(msg, 'subject')
                msg_snippet = snippet[:max(0, snippet_max - len(msg_subject))]

                tags = [
                    k.split(':')[0] for k in keywords if k.endswith(':tag')
                ]

                msg_to = (ExtractEmails(self.hdr(msg, 'to')) +
                          ExtractEmails(self.hdr(msg, 'cc')) +
                          ExtractEmails(self.hdr(msg, 'bcc')))

                msg_idx_pos, msg_info = self.add_new_msg(
                    msg_ptr, msg_id, msg_ts, self.hdr(msg, 'from'), msg_to,
                    msg_subject, msg_snippet, tags)
                self.set_conversation_ids(msg_info[self.MSG_MID], msg)
                mbox.mark_parsed(i)

                added += 1
                if (added % 1000) == 0:
                    GlobalPostingList.Optimize(session, self, quick=True)

        if added:
            mbox.save(session)
        session.ui.mark('%s: Indexed mailbox: %s' % (mailbox_idx, mailbox_fn))
        return added
Exemplo n.º 13
0
    def read_message(self,
                     session,
                     msg_mid,
                     msg_id,
                     msg,
                     msg_ts,
                     mailbox=None):
        keywords = []
        snippet = ''
        payload = [None]
        for part in msg.walk():
            textpart = payload[0] = None
            ctype = part.get_content_type()
            charset = part.get_charset() or 'iso-8859-1'

            def _loader(p):
                if payload[0] is None:
                    payload[0] = self.try_decode(p.get_payload(None, True),
                                                 charset)
                return payload[0]

            if ctype == 'text/plain':
                textpart = _loader(part)
            elif ctype == 'text/html':
                _loader(part)
                if len(payload[0]) > 3:
                    try:
                        textpart = lxml.html.fromstring(
                            payload[0]).text_content()
                    except:
                        session.ui.warning(
                            ('=%s/%s has bogus HTML.') % (msg_mid, msg_id))
                        textpart = payload[0]
                else:
                    textpart = payload[0]
            elif 'pgp' in part.get_content_type():
                keywords.append('pgp:has')

            att = part.get_filename()
            if att:
                att = self.try_decode(att, charset)
                keywords.append('attachment:has')
                keywords.extend(
                    [t + ':att' for t in re.findall(WORD_REGEXP, att.lower())])
                textpart = (textpart or '') + ' ' + att

            if textpart:
                # FIXME: Does this lowercase non-ASCII characters correctly?
                # FIXME: What about encrypted content?
                keywords.extend(re.findall(WORD_REGEXP, textpart.lower()))
                # FIXME: Do this better.
                if ('-----BEGIN PGP' in textpart
                        and '-----END PGP' in textpart):
                    keywords.append('pgp:has')
                for extract in plugins.get_text_kw_extractors():
                    keywords.extend(extract(self, msg, ctype,
                                            lambda: textpart))

                if len(snippet) < 1024:
                    snippet += ' ' + textpart

            for extract in plugins.get_data_kw_extractors():
                keywords.extend(
                    extract(self, msg, ctype, att, part,
                            lambda: _loader(part)))

        keywords.append('%s:id' % msg_id)
        keywords.extend(
            re.findall(WORD_REGEXP,
                       self.hdr(msg, 'subject').lower()))
        keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, 'from').lower()))
        if mailbox:
            keywords.append('%s:mailbox' % mailbox.lower())
        keywords.append('%s:hprint' % HeaderPrint(msg))

        for key in msg.keys():
            key_lower = key.lower()
            if key_lower not in BORING_HEADERS:
                emails = ExtractEmails(self.hdr(msg, key).lower())
                words = set(re.findall(WORD_REGEXP,
                                       self.hdr(msg, key).lower()))
                words -= STOPLIST
                keywords.extend(['%s:%s' % (t, key_lower) for t in words])
                keywords.extend(['%s:%s' % (e, key_lower) for e in emails])
                keywords.extend(['%s:email' % e for e in emails])
                if 'list' in key_lower:
                    keywords.extend(['%s:list' % t for t in words])

        for extract in plugins.get_meta_kw_extractors():
            keywords.extend(extract(self, msg_mid, msg, msg_ts))

        snippet = snippet.replace('\n', ' ').replace('\t',
                                                     ' ').replace('\r', '')
        return (set(keywords) - STOPLIST), snippet.strip()
Exemplo n.º 14
0
 def _fparse(self, fromdata):
     email = ExtractEmails(fromdata)[0]
     name = fromdata.replace(email, '').replace('<>', '').strip()
     return email, (name or email)
Exemplo n.º 15
0
    def __init__(self,
                 session,
                 idx,
                 results=None,
                 start=0,
                 end=None,
                 num=None,
                 expand=None):
        dict.__init__(self)
        self.session = session
        self.expand = expand
        self.idx = idx

        self['search_terms'] = terms = session.searched
        if 'tags' in idx.config:
            self['search_tags'] = [
                idx.config.get_tag(t.split(':')[1], {}) for t in terms
                if t.startswith('in:')
            ]

        results = results or session.results
        if not results:
            self._set_values([], 0, 0, 0)
            return

        num = num or session.config.prefs.num_results
        if end:
            start = end - num
        if start > len(results):
            start = len(results)
        if start < 0:
            start = 0

        rv = []
        count = 0
        expand_ids = [e.msg_idx_pos for e in (expand or [])]
        for idx_pos in results[start:start + num]:
            count += 1
            msg_info = idx.get_msg_at_idx_pos(idx_pos)
            result = self._explain_msg_summary([
                msg_info[MailIndex.MSG_MID], msg_info[MailIndex.MSG_ID],
                msg_info[MailIndex.MSG_FROM],
                idx.expand_to_list(msg_info), msg_info[MailIndex.MSG_SUBJECT],
                msg_info[MailIndex.MSG_SNIPPET], msg_info[MailIndex.MSG_DATE],
                msg_info[MailIndex.MSG_TAGS].split(','),
                session.config.is_editable_message(msg_info)
            ])
            # FIXME: This is nice, but doing it in _explain_msg_summary
            #                would be nicer.
            result['tags'] = []
            if 'tags' in idx.config:
                searched = [t.get('slug') for t in self['search_tags']]
                for t in sorted(idx.get_tags(msg_info=msg_info)):
                    tag = idx.config.get_tag(t)
                    if tag:
                        result['tags'].append(
                            dict_merge(tag,
                                       {'searched':
                                        (tag['slug'] in searched)}))

            if not expand and 'flat' not in (session.order or ''):
                conv = idx.get_conversation(msg_info)
            else:
                conv = [msg_info]
            conv_from = [c[MailIndex.MSG_FROM] for c in conv]

            result['short_from'] = self._compact(self._names(conv_from), 25)
            result['conv_count'] = len(conv)
            result['conv_mids'] = [c[MailIndex.MSG_MID] for c in conv]
            # FIXME: conv_people should look stuff in our contact list
            result['conv_people'] = people = [{
                'email': (ExtractEmails(p) or [''])[0],
                'name':
                self._name(p, short=False),
            } for p in list(set(conv_from))]
            people.sort(key=lambda i: i['name'] + i['email'])

            if expand and idx_pos in expand_ids:
                exp_email = expand[expand_ids.index(idx_pos)]
                result['message'] = self._message_details([exp_email])[0]
            rv.append(result)

        self._set_values(rv, start, count, len(results))
Exemplo n.º 16
0
    def read_message(self,
                     session,
                     msg_mid,
                     msg_id,
                     msg,
                     msg_size,
                     msg_ts,
                     mailbox=None):
        keywords = []
        snippet = ''
        payload = [None]
        for part in msg.walk():
            textpart = payload[0] = None
            ctype = part.get_content_type()
            charset = part.get_content_charset() or 'iso-8859-1'

            def _loader(p):
                if payload[0] is None:
                    payload[0] = self.try_decode(p.get_payload(None, True),
                                                 charset)
                return payload[0]

            if ctype == 'text/plain':
                textpart = _loader(part)
            elif ctype == 'text/html':
                _loader(part)
                if len(payload[0]) > 3:
                    try:
                        textpart = lxml.html.fromstring(
                            payload[0]).text_content()
                    except:
                        session.ui.warning(
                            _('=%s/%s has bogus HTML.') % (msg_mid, msg_id))
                        textpart = payload[0]
                else:
                    textpart = payload[0]
            elif 'pgp' in part.get_content_type():
                keywords.append('pgp:has')

            att = part.get_filename()
            if att:
                att = self.try_decode(att, charset)
                keywords.append('attachment:has')
                keywords.extend(
                    [t + ':att' for t in re.findall(WORD_REGEXP, att.lower())])
                textpart = (textpart or '') + ' ' + att

            if textpart:
                # FIXME: Does this lowercase non-ASCII characters correctly?
                keywords.extend(re.findall(WORD_REGEXP, textpart.lower()))

                # NOTE: As a side effect here, the cryptostate plugin will
                #       add a 'crypto:has' keyword which we check for below
                #       before performing further processing.
                for kwe in plugins.get_text_kw_extractors():
                    keywords.extend(kwe(self, msg, ctype, textpart))

                if len(snippet) < 1024:
                    snippet += ' ' + textpart

            for extract in plugins.get_data_kw_extractors():
                keywords.extend(
                    extract(self, msg, ctype, att, part,
                            lambda: _loader(part)))

        if 'crypto:has' in keywords:
            e = Email(self, -1)
            e.msg_parsed = msg
            e.msg_info = self.BOGUS_METADATA[:]
            tree = e.get_message_tree(want=(e.WANT_MSG_TREE_PGP +
                                            ('text_parts', )))

            # Look for inline PGP parts, update our status if found
            e.evaluate_pgp(tree, decrypt=session.config.prefs.index_encrypted)
            msg.signature_info = tree['crypto']['signature']
            msg.encryption_info = tree['crypto']['encryption']

            # Index the contents, if configured to do so
            if session.config.prefs.index_encrypted:
                for text in [t['data'] for t in tree['text_parts']]:
                    keywords.extend(re.findall(WORD_REGEXP, text.lower()))
                    for kwe in plugins.get_text_kw_extractors():
                        keywords.extend(kwe(self, msg, 'text/plain', text))

        keywords.append('%s:id' % msg_id)
        keywords.extend(
            re.findall(WORD_REGEXP,
                       self.hdr(msg, 'subject').lower()))
        keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, 'from').lower()))
        if mailbox:
            keywords.append('%s:mailbox' % mailbox.lower())
        keywords.append('%s:hp' % HeaderPrint(msg))

        for key in msg.keys():
            key_lower = key.lower()
            if key_lower not in BORING_HEADERS:
                emails = ExtractEmails(self.hdr(msg, key).lower())
                words = set(re.findall(WORD_REGEXP,
                                       self.hdr(msg, key).lower()))
                words -= STOPLIST
                keywords.extend(['%s:%s' % (t, key_lower) for t in words])
                keywords.extend(['%s:%s' % (e, key_lower) for e in emails])
                keywords.extend(['%s:email' % e for e in emails])
                if 'list' in key_lower:
                    keywords.extend(['%s:list' % t for t in words])
        for key in EXPECTED_HEADERS:
            if not msg[key]:
                keywords.append('%s:missing' % key)

        for extract in plugins.get_meta_kw_extractors():
            keywords.extend(extract(self, msg_mid, msg, msg_size, msg_ts))

        snippet = snippet.replace('\n', ' ').replace('\t',
                                                     ' ').replace('\r', '')
        return (set(keywords) - STOPLIST), snippet.strip()
Exemplo n.º 17
0
class MailIndex:
    """This is a lazily parsing object representing a mailpile index."""

    MSG_MID = 0
    MSG_PTRS = 1
    MSG_ID = 2
    MSG_DATE = 3
    MSG_FROM = 4
    MSG_TO = 5
    MSG_CC = 6
    MSG_KB = 7
    MSG_SUBJECT = 8
    MSG_BODY = 9
    MSG_TAGS = 10
    MSG_REPLIES = 11
    MSG_THREAD_MID = 12

    MSG_FIELDS_V1 = 11
    MSG_FIELDS_V2 = 13

    BOGUS_METADATA = [
        None, '', None, '0', '(no sender)', '', '', '0', '(not in index)', '',
        '', '', '-1'
    ]

    MAX_INCREMENTAL_SAVES = 25

    def __init__(self, config):
        self.config = config
        self.INDEX = []
        self.INDEX_SORT = {}
        self.INDEX_THR = []
        self.PTRS = {}
        self.TAGS = {}
        self.MSGIDS = {}
        self.EMAILS = []
        self.EMAIL_IDS = {}
        self.CACHE = {}
        self.MODIFIED = set()
        self.EMAILS_SAVED = 0
        self._saved_changes = 0
        self._lock = threading.Lock()

    def l2m(self, line):
        return line.decode('utf-8').split(u'\t')

    # A translation table for message parts stored in the index, consists of
    # a mapping from unicode ordinals to either another unicode ordinal or
    # None, to remove a character. By default it removes the ASCII control
    # characters and replaces tabs and newlines with spaces.
    NORM_TABLE = dict(
        [(i, None) for i in range(0, 0x20)], **{
            ord(u'\t'): ord(u' '),
            ord(u'\r'): ord(u' '),
            ord(u'\n'): ord(u' '),
            0x7F: None
        })

    def m2l(self, message):
        # Normalize the message before saving it so we can be sure that we will
        # be able to read it back later.
        parts = [unicode(p).translate(self.NORM_TABLE) for p in message]
        return (u'\t'.join(parts)).encode('utf-8')

    def load(self, session=None):
        self.INDEX = []
        self.CACHE = {}
        self.PTRS = {}
        self.MSGIDS = {}
        self.EMAILS = []
        self.EMAIL_IDS = {}
        CachedSearchResultSet.DropCaches()

        def process_line(line):
            try:
                line = line.strip()
                if line.startswith('#'):
                    pass
                elif line.startswith('@'):
                    pos, email = line[1:].split('\t', 1)
                    pos = int(pos, 36)
                    while len(self.EMAILS) < pos + 1:
                        self.EMAILS.append('')
                    unquoted_email = unquote(email).decode('utf-8')
                    self.EMAILS[pos] = unquoted_email
                    self.EMAIL_IDS[unquoted_email.split()[0].lower()] = pos
                elif line:
                    words = line.split('\t')

                    # Migration: converting old metadata into new!
                    if len(words) != self.MSG_FIELDS_V2:

                        # V1 -> V2 adds MSG_CC and MSG_KB
                        if len(words) == self.MSG_FIELDS_V1:
                            words[self.MSG_CC:self.MSG_CC] = ['']
                            words[self.MSG_KB:self.MSG_KB] = ['0']

                        # Add V2 -> V3 here, etc. etc.

                        if len(words) == self.MSG_FIELDS_V2:
                            line = '\t'.join(words)
                        else:
                            raise Exception(
                                _('Your metadata index is either '
                                  'too old, too new or corrupt!'))

                    pos = int(words[self.MSG_MID], 36)
                    while len(self.INDEX) < pos + 1:
                        self.INDEX.append('')

                    self.INDEX[pos] = line
                    self.MSGIDS[words[self.MSG_ID]] = pos
                    self.update_msg_tags(pos, words)
                    for msg_ptr in words[self.MSG_PTRS].split(','):
                        self.PTRS[msg_ptr] = pos

            except ValueError:
                pass

        if session:
            session.ui.mark(_('Loading metadata index...'))
        try:
            self._lock.acquire()
            fd = open(self.config.mailindex_file(), 'r')
            for line in fd:
                if line.startswith(GPG_BEGIN_MESSAGE):
                    for line in decrypt_gpg([line], fd):
                        process_line(line)
                else:
                    process_line(line)
            fd.close()
        except IOError:
            if session:
                session.ui.warning(
                    _('Metadata index not found: %s') %
                    self.config.mailindex_file())
        finally:
            self._lock.release()

        self.cache_sort_orders(session)
        if session:
            session.ui.mark(
                _('Loaded metadata, %d messages') % len(self.INDEX))
        self.EMAILS_SAVED = len(self.EMAILS)

    def update_msg_tags(self, msg_idx_pos, msg_info):
        tags = set([t for t in msg_info[self.MSG_TAGS].split(',') if t])
        for tid in (set(self.TAGS.keys()) - tags):
            self.TAGS[tid] -= set([msg_idx_pos])
        for tid in tags:
            if tid not in self.TAGS:
                self.TAGS[tid] = set()
            self.TAGS[tid].add(msg_idx_pos)

    def save_changes(self, session=None):
        mods, self.MODIFIED = self.MODIFIED, set()
        if mods or len(self.EMAILS) > self.EMAILS_SAVED:
            if self._saved_changes >= self.MAX_INCREMENTAL_SAVES:
                return self.save(session=session)
            try:
                self._lock.acquire()
                if session:
                    session.ui.mark(_("Saving metadata index changes..."))
                fd = gpg_open(self.config.mailindex_file(),
                              self.config.prefs.gpg_recipient, 'a')
                for eid in range(self.EMAILS_SAVED, len(self.EMAILS)):
                    quoted_email = quote(self.EMAILS[eid].encode('utf-8'))
                    fd.write('@%s\t%s\n' % (b36(eid), quoted_email))
                for pos in mods:
                    fd.write(self.INDEX[pos] + '\n')
                fd.close()
                flush_append_cache()
                if session:
                    session.ui.mark(_("Saved metadata index changes"))
                self.EMAILS_SAVED = len(self.EMAILS)
                self._saved_changes += 1
            finally:
                self._lock.release()

    def save(self, session=None):
        try:
            self._lock.acquire()
            self.MODIFIED = set()
            if session:
                session.ui.mark(_("Saving metadata index..."))

            idxfile = self.config.mailindex_file()
            newfile = '%s.new' % idxfile

            fd = gpg_open(newfile, self.config.prefs.gpg_recipient, 'w')
            fd.write('# This is the mailpile.py index file.\n')
            fd.write('# We have %d messages!\n' % len(self.INDEX))
            for eid in range(0, len(self.EMAILS)):
                quoted_email = quote(self.EMAILS[eid].encode('utf-8'))
                fd.write('@%s\t%s\n' % (b36(eid), quoted_email))
            for item in self.INDEX:
                fd.write(item + '\n')
            fd.close()

            # Keep the last 5 index files around... just in case.
            backup_file(idxfile, backups=5, min_age_delta=10)
            os.rename(newfile, idxfile)

            flush_append_cache()
            self._saved_changes = 0
            if session:
                session.ui.mark(_("Saved metadata index"))
        finally:
            self._lock.release()

    def update_ptrs_and_msgids(self, session):
        session.ui.mark(_('Updating high level indexes'))
        for offset in range(0, len(self.INDEX)):
            message = self.l2m(self.INDEX[offset])
            if len(message) == self.MSG_FIELDS_V2:
                self.MSGIDS[message[self.MSG_ID]] = offset
                for msg_ptr in message[self.MSG_PTRS].split(','):
                    self.PTRS[msg_ptr] = offset
            else:
                session.ui.warning(_('Bogus line: %s') % line)

    def try_decode(self, text, charset):
        for cs in (charset, 'iso-8859-1', 'utf-8'):
            if cs:
                try:
                    return text.decode(cs)
                except (UnicodeEncodeError, UnicodeDecodeError, LookupError):
                    pass
        return "".join(i for i in text if ord(i) < 128)

    def hdr(self, msg, name, value=None):
        try:
            if value is None and msg:
                # Security: RFC822 headers are not allowed to have (unencoded)
                # non-ascii characters in them, so we just strip them all out
                # before parsing.
                # FIXME: This is "safe", but can we be smarter/gentler?
                value = CleanText(msg[name], replace='_').clean
            # Note: decode_header does the wrong thing with "quoted" data.
            decoded = email.header.decode_header((value
                                                  or '').replace('"', ''))
            return (' '.join([self.try_decode(t[0], t[1]) for t in decoded
                              ])).replace('\r',
                                          ' ').replace('\t',
                                                       ' ').replace('\n', ' ')
        except email.errors.HeaderParseError:
            return ''

    def update_location(self, session, msg_idx_pos, msg_ptr):
        msg_info = self.get_msg_at_idx_pos(msg_idx_pos)
        msg_ptrs = msg_info[self.MSG_PTRS].split(',')
        self.PTRS[msg_ptr] = msg_idx_pos

        # If message was seen in this mailbox before, update the location
        for i in range(0, len(msg_ptrs)):
            if msg_ptrs[i][:MBX_ID_LEN] == msg_ptr[:MBX_ID_LEN]:
                msg_ptrs[i] = msg_ptr
                msg_ptr = None
                break
        # Otherwise, this is a new mailbox, record this sighting as well!
        if msg_ptr:
            msg_ptrs.append(msg_ptr)

        msg_info[self.MSG_PTRS] = ','.join(msg_ptrs)
        self.set_msg_at_idx_pos(msg_idx_pos, msg_info)

    def _parse_date(self, date_hdr):
        """Parse a Date: or Received: header into a unix timestamp."""
        try:
            if ';' in date_hdr:
                date_hdr = date_hdr.split(';')[-1].strip()
            msg_ts = long(rfc822.mktime_tz(rfc822.parsedate_tz(date_hdr)))
            if (msg_ts > (time.time() + 24 * 3600)) or (msg_ts < 1):
                return None
            else:
                return msg_ts
        except (ValueError, TypeError, OverflowError):
            return None

    def _extract_date_ts(self, session, msg_mid, msg_id, msg, last_date):
        """Extract a date, sanity checking against the Received: headers."""
        hdrs = [self.hdr(msg, 'date')] + (msg.get_all('received') or [])
        dates = [self._parse_date(date_hdr) for date_hdr in hdrs]
        msg_ts = dates[0]
        nz_dates = sorted([d for d in dates if d])

        if nz_dates:
            median = nz_dates[len(nz_dates) / 2]
            if msg_ts and abs(msg_ts - median) < 31 * 24 * 3600:
                return msg_ts
            else:
                session.ui.warning(
                    _('=%s/%s using Received: instead of Date:') %
                    (msg_mid, msg_id))
                return median
        else:
            # If the above fails, we assume the messages in the mailbox are in
            # chronological order and just add 1 second to the date of the last
            # message if date parsing fails for some reason.
            session.ui.warning(
                _('=%s/%s has a bogus date') % (msg_mid, msg_id))
            return last_date + 1

    def encode_msg_id(self, msg_id):
        return b64c(sha1b64(msg_id.strip()))

    def get_msg_id(self, msg, msg_ptr):
        raw_msg_id = self.hdr(msg, 'message-id')
        if not raw_msg_id:
            # Create a very long pseudo-msgid for messages without a
            # Message-ID. This was a very badly behaved mailer, so if
            # we create duplicates this way, we are probably only
            # losing spam. Even then the Received line should save us.
            raw_msg_id = ('\t'.join([
                self.hdr(msg, 'date'),
                self.hdr(msg, 'subject'),
                self.hdr(msg, 'received'),
                self.hdr(msg, 'from'),
                self.hdr(msg, 'to')
            ])).strip()
        # Fall back to the msg_ptr if all else fails.
        if not raw_msg_id:
            print _('WARNING: No proper Message-ID for %s') % msg_ptr
        return self.encode_msg_id(raw_msg_id or msg_ptr)

    def scan_mailbox(self, session, mailbox_idx, mailbox_fn, mailbox_opener):
        try:
            mbox = mailbox_opener(session, mailbox_idx)
            if mbox.editable:
                session.ui.mark(
                    _('%s: Skipped: %s') % (mailbox_idx, mailbox_fn))
                return 0
            else:
                session.ui.mark(
                    _('%s: Checking: %s') % (mailbox_idx, mailbox_fn))
        except (IOError, OSError, NoSuchMailboxError), e:
            session.ui.mark(
                _('%s: Error opening: %s (%s)') % (mailbox_idx, mailbox_fn, e))
            return 0

        unparsed = mbox.unparsed()
        if not unparsed:
            return 0

        if len(self.PTRS.keys()) == 0:
            self.update_ptrs_and_msgids(session)

        snippet_max = session.config.sys.snippet_max
        added = 0
        msg_ts = int(time.time())
        for ui in range(0, len(unparsed)):
            if mailpile.util.QUITTING:
                break

            i = unparsed[ui]
            parse_status = _('%s: Reading your mail: %d%% (%d/%d messages)'
                             ) % (mailbox_idx, 100 * ui / len(unparsed), ui,
                                  len(unparsed))

            msg_ptr = mbox.get_msg_ptr(mailbox_idx, i)
            if msg_ptr in self.PTRS:
                if (ui % 317) == 0:
                    session.ui.mark(parse_status)
                    play_nice_with_threads()
                continue
            else:
                session.ui.mark(parse_status)
                play_nice_with_threads()

            # Message new or modified, let's parse it.
            if 'rescan' in session.config.sys.debug:
                session.ui.debug('Reading message %s/%s' % (mailbox_idx, i))
            try:
                msg_fd = mbox.get_file(i)
                msg = ParseMessage(
                    msg_fd, pgpmime=session.config.prefs.index_encrypted)
            except (IOError, OSError, ValueError, IndexError, KeyError):
                if session.config.sys.debug:
                    traceback.print_exc()
                session.ui.warning(('Reading message %s/%s FAILED, skipping') %
                                   (mailbox_idx, i))
                continue

            msg_size = msg_fd.tell()
            msg_id = self.get_msg_id(msg, msg_ptr)
            if msg_id in self.MSGIDS:
                self.update_location(session, self.MSGIDS[msg_id], msg_ptr)
                added += 1
            else:
                # Add new message!
                msg_mid = b36(len(self.INDEX))

                msg_ts = self._extract_date_ts(session, msg_mid, msg_id, msg,
                                               msg_ts)

                play_nice_with_threads()
                keywords, snippet = self.index_message(
                    session,
                    msg_mid,
                    msg_id,
                    msg,
                    msg_size,
                    msg_ts,
                    mailbox=mailbox_idx,
                    compact=False,
                    filter_hooks=plugins.filter_hooks([self.filter_keywords]))

                msg_subject = self.hdr(msg, 'subject')
                msg_snippet = snippet[:max(0, snippet_max - len(msg_subject))]

                tags = [
                    k.split(':')[0] for k in keywords
                    if k.endswith(':in') or k.endswith(':tag')
                ]

                msg_to = ExtractEmails(self.hdr(msg, 'to'))
                msg_cc = (ExtractEmails(self.hdr(msg, 'cc')) +
                          ExtractEmails(self.hdr(msg, 'bcc')))

                msg_idx_pos, msg_info = self.add_new_msg(
                    msg_ptr, msg_id, msg_ts, self.hdr(msg, 'from'), msg_to,
                    msg_cc, msg_size, msg_subject, msg_snippet, tags)
                self.set_conversation_ids(msg_info[self.MSG_MID], msg)
                mbox.mark_parsed(i)

                added += 1
                GlobalPostingList.Optimize(session,
                                           self,
                                           lazy=True,
                                           quick=True)

        if added:
            mbox.save(session)
        session.ui.mark(
            _('%s: Indexed mailbox: %s') % (mailbox_idx, mailbox_fn))
        return added