Exemplo n.º 1
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()

        # Command-line arguments...
        msgs = list(self.args)
        timeout = -1
        with_header = False
        without_mid = False
        columns = []
        while msgs and msgs[0].lower() != '--':
            arg = msgs.pop(0)
            if arg.startswith('--timeout='):
                timeout = float(arg[10:])
            elif arg.startswith('--header'):
                with_header = True
            elif arg.startswith('--no-mid'):
                without_mid = True
            else:
                columns.append(msgs.pop(0))
        if msgs and msgs[0].lower() == '--':
            msgs.pop(0)

        # Form arguments...
        timeout = float(self.data.get('timeout', [timeout])[0])
        with_header |= self._truthy(self.data.get('header', [''])[0])
        without_mid |= self._truthy(self.data.get('no-mid', [''])[0])
        columns.extend(self.data.get('term', []))
        msgs.extend(['=%s' % mid.replace('=', '')
                     for mid in self.data.get('mid', [])])

        # Add a header to the CSV if requested
        if with_header:
            results = [[col.split('||')[0].split(':', 1)[0].split('=', 1)[0]
                        for col in columns]]
            if not without_mid:
                results[0] = ['MID'] + results[0]
        else:
            results = []

        deadline = (time.time() + timeout) if (timeout > 0) else None
        msg_idxs = self._choose_messages(msgs)
        for msg_idx in msg_idxs:
            e = Email(idx, msg_idx)
            session.ui.mark(_('Digging into =%s') % e.msg_mid())
            row = [] if without_mid else ['%s' % e.msg_mid()]
            for cellspec in columns:
                row.extend(self._cell(idx, e, cellspec))
            results.append(row)
            if deadline and deadline < time.time():
                break

        return self._success(_('Found %d rows in %d messages'
                               ) % (len(results), len(msg_idxs)), results)
Exemplo n.º 2
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()
        args = list(self.args)

        if config.sys.lockdown:
            return self._error(_("In lockdown, doing nothing."))

        delay = play_nice_with_threads()
        if delay > 0:
            session.ui.notify(
                (_("Note: periodic delay is %ss, run from shell to " "speed up: mp --rescan=...")) % delay
            )

        if args and args[0].lower() == "vcards":
            return self._rescan_vcards(session, config)
        elif args and args[0].lower() == "mailboxes":
            return self._rescan_mailboxes(session, config)
        elif args and args[0].lower() == "all":
            args.pop(0)

        msg_idxs = self._choose_messages(args)
        if msg_idxs:
            for msg_idx_pos in msg_idxs:
                e = Email(idx, msg_idx_pos)
                try:
                    session.ui.mark("Re-indexing %s" % e.msg_mid())
                    idx.index_email(self.session, e)
                except KeyboardInterrupt:
                    raise
                except:
                    self._ignore_exception()
                    session.ui.warning(_("Failed to reindex: %s") % e.msg_mid())
            return self._success(_("Indexed %d messages") % len(msg_idxs), result={"messages": len(msg_idxs)})

        else:
            # FIXME: Need a lock here?
            if "rescan" in config._running:
                return self._success(_("Rescan already in progress"))
            config._running["rescan"] = True
            try:
                results = {}
                results.update(self._rescan_vcards(session, config))
                results.update(self._rescan_mailboxes(session, config))
                if "aborted" in results:
                    raise KeyboardInterrupt()
                return self._success(_("Rescanned vcards and mailboxes"), result=results)
            except (KeyboardInterrupt), e:
                return self._error(_("User aborted"), info=results)
            finally:
Exemplo n.º 3
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()

        delay = play_nice_with_threads()
        if delay > 0:
            session.ui.notify(
                (_("Note: periodic delay is %ss, run from shell to " "speed up: mp --rescan=...")) % delay
            )

        if self.args and self.args[0].lower() == "vcards":
            return self._rescan_vcards(session, config)
        elif self.args and self.args[0].lower() == "mailboxes":
            return self._rescan_mailboxes(session, config)
        elif self.args and self.args[0].lower() == "all":
            self.args.pop(0)

        msg_idxs = self._choose_messages(self.args)
        if msg_idxs:
            for msg_idx_pos in msg_idxs:
                e = Email(idx, msg_idx_pos)
                session.ui.mark("Re-indexing %s" % e.msg_mid())
                idx.index_email(self.session, e)
            return {"messages": len(msg_idxs)}
        else:
            # FIXME: Need a lock here?
            if "rescan" in config._running:
                return True
            config._running["rescan"] = True
            try:
                return dict_merge(self._rescan_vcards(session, config), self._rescan_mailboxes(session, config))
            finally:
                del config._running["rescan"]
Exemplo n.º 4
0
    def command(self, save=True):
        session, config, idx = self.session, self.session.config, self._idx()
        mbox_type = config.prefs.export_format

        args = list(self.args)
        if args and ":" in args[-1]:
            mbox_type, path = args.pop(-1).split(":", 1)
        else:
            path = self.export_path(mbox_type)

        if os.path.exists(path):
            return self._error("Already exists: %s" % path)

        msg_idxs = self._choose_messages(args)
        if not msg_idxs:
            session.ui.warning("No messages selected")
            return False

        mbox = self.create_mailbox(mbox_type, path)
        for msg_idx in msg_idxs:
            e = Email(idx, msg_idx)
            session.ui.mark("Exporting =%s ..." % e.msg_mid())
            m = e.get_msg()
            # FIXME: This doesn't work
            # tags = [t.slug for t in e.get_message_tags()]
            # print 'Tags: %s' % tags
            # m['X-Mailpile-Tags'] = ', '.join(tags)
            mbox.add(m)
        mbox.flush()

        session.ui.mark("Exported %d messages to %s" % (len(msg_idxs), path))
        return {"exported": len(msg_idxs), "created": path}
Exemplo n.º 5
0
    def command(self, save=True):
        session, config, idx = self.session, self.session.config, self._idx()
        mbox_type = config.prefs.export_format

        args = list(self.args)
        if args and ':' in args[-1]:
            mbox_type, path = args.pop(-1).split(':', 1)
        else:
            path = self.export_path(mbox_type)

        if os.path.exists(path):
            return self._error('Already exists: %s' % path)

        msg_idxs = self._choose_messages(args)
        if not msg_idxs:
            session.ui.warning('No messages selected')
            return False

        mbox = self.create_mailbox(mbox_type, path)
        for msg_idx in msg_idxs:
            e = Email(idx, msg_idx)
            session.ui.mark('Exporting =%s ...' % e.msg_mid())
            m = e.get_msg()
            # FIXME: This doesn't work
            #tags = [t.slug for t in e.get_message_tags()]
            #print 'Tags: %s' % tags
            #m['X-Mailpile-Tags'] = ', '.join(tags)
            mbox.add(m)
        mbox.flush()

        session.ui.mark('Exported %d messages to %s' % (len(msg_idxs), path))
        return {'exported': len(msg_idxs), 'created': path}
Exemplo n.º 6
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()

        delay = play_nice_with_threads()
        if delay > 0:
            session.ui.notify((
                _('Note: periodic delay is %ss, run from shell to '
                  'speed up: mp --rescan=...')
            ) % delay)

        if self.args and self.args[0].lower() == 'vcards':
            return self._rescan_vcards(session, config)
        elif self.args and self.args[0].lower() == 'all':
            self.args.pop(0)

        msg_idxs = self._choose_messages(self.args)
        if msg_idxs:
            session.ui.warning(_('FIXME: rescan messages: %s') % msg_idxs)
            for msg_idx_pos in msg_idxs:
                e = Email(idx, msg_idx_pos)
                session.ui.mark('Re-indexing %s' % e.msg_mid())
                idx.index_email(self.session, e)
            return {'messages': len(msg_idxs)}
        else:
            # FIXME: Need a lock here?
            if 'rescan' in config._running:
                return True
            config._running['rescan'] = True
            try:
                return dict_merge(
                    self._rescan_vcards(session, config),
                    self._rescan_mailboxes(session, config)
                )
            finally:
                del config._running['rescan']
Exemplo n.º 7
0
    def command(self, save=True):
        session, config, idx = self.session, self.session.config, self._idx()
        mbox_type = config.prefs.export_format

        if self.session.config.sys.lockdown:
            return self._error(_('In lockdown, doing nothing.'))

        args = list(self.args)
        if args and ':' in args[-1]:
            mbox_type, path = args.pop(-1).split(':', 1)
        else:
            path = self.export_path(mbox_type)

        if args and args[-1] == 'flat':
            flat = True
            args.pop(-1)
        else:
            flat = False

        if os.path.exists(path):
            return self._error('Already exists: %s' % path)

        msg_idxs = list(self._choose_messages(args))
        if not msg_idxs:
            session.ui.warning('No messages selected')
            return False

        # Exporting messages without their threads barely makes any
        # sense.
        if not flat:
            for i in reversed(range(0, len(msg_idxs))):
                mi = msg_idxs[i]
                msg_idxs[i:i + 1] = [
                    int(m[idx.MSG_MID], 36)
                    for m in idx.get_conversation(msg_idx=mi)
                ]

        # Let's always export in the same order. Stability is nice.
        msg_idxs.sort()

        mbox = self.create_mailbox(mbox_type, path)
        exported = {}
        while msg_idxs:
            msg_idx = msg_idxs.pop(0)
            if msg_idx not in exported:
                e = Email(idx, msg_idx)
                session.ui.mark('Exporting =%s ...' % e.msg_mid())
                mbox.add(e.get_msg())
                exported[msg_idx] = 1

        mbox.flush()

        return self._success(
            _('Exported %d messages to %s') % (len(exported), path), {
                'exported': len(exported),
                'created': path
            })
Exemplo n.º 8
0
    def command(self, save=True):
        session, config, idx = self.session, self.session.config, self._idx()
        mbox_type = config.prefs.export_format

        if self.session.config.sys.lockdown:
            return self._error(_('In lockdown, doing nothing.'))

        args = list(self.args)
        if args and ':' in args[-1]:
            mbox_type, path = args.pop(-1).split(':', 1)
        else:
            path = self.export_path(mbox_type)

        if args and args[-1] == 'flat':
            flat = True
            args.pop(-1)
        else:
            flat = False

        if os.path.exists(path):
            return self._error('Already exists: %s' % path)

        msg_idxs = list(self._choose_messages(args))
        if not msg_idxs:
            session.ui.warning('No messages selected')
            return False

        # Exporting messages without their threads barely makes any
        # sense.
        if not flat:
            for i in reversed(range(0, len(msg_idxs))):
                mi = msg_idxs[i]
                msg_idxs[i:i+1] = [int(m[idx.MSG_MID], 36)
                                   for m in idx.get_conversation(msg_idx=mi)]

        # Let's always export in the same order. Stability is nice.
        msg_idxs.sort()

        mbox = self.create_mailbox(mbox_type, path)
        exported = {}
        while msg_idxs:
            msg_idx = msg_idxs.pop(0)
            if msg_idx not in exported:
                e = Email(idx, msg_idx)
                session.ui.mark('Exporting =%s ...' % e.msg_mid())
                mbox.add(e.get_msg())
                exported[msg_idx] = 1

        mbox.flush()

        return self._success(
            _('Exported %d messages to %s') % (len(exported), path),
            {
                'exported': len(exported),
                'created': path
            })
Exemplo n.º 9
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()
        args = list(self.args)
        flags = []
        while args and args[0][:1] == '-':
            flags.append(args.pop(0))

        msg_idxs = list(self._choose_messages(args))
        if not msg_idxs:
            return self._error('No messages selected')

        wrote = []
        for msg_idx in msg_idxs:
            e = Email(idx, msg_idx)
            ts = long(e.get_msg_info(field=idx.MSG_DATE), 36)
            dt = datetime.datetime.fromtimestamp(ts)
            subject = e.get_msg_info(field=idx.MSG_SUBJECT)

            fn = ('%4.4d-%2.2d-%2.2d.%s.%s.html'
                  % (dt.year, dt.month, dt.day,
                     CleanText(subject,
                               banned=CleanText.NONDNS, replace='_'
                               ).clean.replace('____', '_')[:50],
                     e.msg_mid())
                  ).encode('ascii', 'ignore')

            session.ui.mark(_('Printing e-mail to %s') % fn)
            smv = SingleMessageView(session, arg=['=%s' % e.msg_mid()])
            html = smv.run().as_html()
            if '-sign' in flags:
                key = config.prefs.gpg_recipient
                html = '<printed ts=%d -->\n%s\n<!-- \n' % (time.time(), html)
                rc, signed = self._gnupg().sign(html.encode('utf-8'),
                                                fromkey=key,
                                                clearsign=True)
                if rc != 0:
                   return self._error('Failed to sign printout')
                html = '<!--\n%s\n-->\n' % signed.decode('utf-8')
            with open(fn, 'wb') as fd:
                fd.write(html.encode('utf-8'))
                wrote.append({'mid': e.msg_mid(), 'filename': fn})

        return self._success(_('Printed to %d files') % len(wrote), wrote)
Exemplo n.º 10
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()
        args = list(self.args)
        flags = []
        while args and args[0][:1] == '-':
            flags.append(args.pop(0))

        msg_idxs = list(self._choose_messages(args))
        if not msg_idxs:
            return self._error('No messages selected')

        wrote = []
        for msg_idx in msg_idxs:
            e = Email(idx, msg_idx)
            ts = long(e.get_msg_info(field=idx.MSG_DATE), 36)
            dt = datetime.datetime.fromtimestamp(ts)
            subject = e.get_msg_info(field=idx.MSG_SUBJECT)

            fn = ('%4.4d-%2.2d-%2.2d.%s.%s.html' %
                  (dt.year, dt.month, dt.day,
                   CleanText(subject, banned=CleanText.NONDNS,
                             replace='_').clean.replace('____', '_')[:50],
                   e.msg_mid())).encode('ascii', 'ignore')

            session.ui.mark(_('Printing e-mail to %s') % fn)
            smv = SingleMessageView(session, arg=['=%s' % e.msg_mid()])
            html = smv.run().as_html()
            if '-sign' in flags:
                key = config.prefs.gpg_recipient
                html = '<printed ts=%d -->\n%s\n<!-- \n' % (time.time(), html)
                rc, signed = self._gnupg().sign(html.encode('utf-8'),
                                                fromkey=key,
                                                clearsign=True)
                if rc != 0:
                    return self._error('Failed to sign printout')
                html = '<!--\n%s\n-->\n' % signed.decode('utf-8')
            with open(fn, 'wb') as fd:
                fd.write(html.encode('utf-8'))
                wrote.append({'mid': e.msg_mid(), 'filename': fn})

        return self._success(_('Printed to %d files') % len(wrote), wrote)
Exemplo n.º 11
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()
        args = list(self.args)

        if config.sys.lockdown:
            return self._error(_('In lockdown, doing nothing.'))

        delay = play_nice_with_threads()
        if delay > 0:
            session.ui.notify((
                _('Note: periodic delay is %ss, run from shell to '
                  'speed up: mp --rescan=...')
            ) % delay)

        if args and args[0].lower() == 'vcards':
            return self._rescan_vcards(session, config)
        elif args and args[0].lower() == 'mailboxes':
            return self._rescan_mailboxes(session, config)
        elif args and args[0].lower() == 'all':
            args.pop(0)

        msg_idxs = self._choose_messages(args)
        if msg_idxs:
            for msg_idx_pos in msg_idxs:
                e = Email(idx, msg_idx_pos)
                session.ui.mark('Re-indexing %s' % e.msg_mid())
                idx.index_email(self.session, e)
            return self._success(_('Indexed %d messages') % len(msg_idxs),
                                 result={'messages': len(msg_idxs)})

        else:
            # FIXME: Need a lock here?
            if 'rescan' in config._running:
                return self._success(_('Rescan already in progress'))
            config._running['rescan'] = True
            try:
                results = {}
                results.update(self._rescan_vcards(session, config))
                results.update(self._rescan_mailboxes(session, config))
                if 'aborted' in results:
                    raise KeyboardInterrupt()
                return self._success(_('Rescanned vcards and mailboxes'),
                                     result=results)
            except (KeyboardInterrupt), e:
                return self._error(_('User aborted'), info=results)
            finally:
Exemplo n.º 12
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()

        if config.sys.lockdown:
            session.ui.warning(_('In lockdown, doing nothing.'))
            return False

        delay = play_nice_with_threads()
        if delay > 0:
            session.ui.notify((
                _('Note: periodic delay is %ss, run from shell to '
                  'speed up: mp --rescan=...')
            ) % delay)

        if self.args and self.args[0].lower() == 'vcards':
            return self._rescan_vcards(session, config)
        elif self.args and self.args[0].lower() == 'mailboxes':
            return self._rescan_mailboxes(session, config)
        elif self.args and self.args[0].lower() == 'all':
            self.args.pop(0)

        msg_idxs = self._choose_messages(self.args)
        if msg_idxs:
            for msg_idx_pos in msg_idxs:
                e = Email(idx, msg_idx_pos)
                session.ui.mark('Re-indexing %s' % e.msg_mid())
                idx.index_email(self.session, e)
            return {'messages': len(msg_idxs)}
        else:
            # FIXME: Need a lock here?
            if 'rescan' in config._running:
                return True
            config._running['rescan'] = True
            try:
                return dict_merge(
                    self._rescan_vcards(session, config),
                    self._rescan_mailboxes(session, config)
                )
            finally:
                del config._running['rescan']
Exemplo n.º 13
0
    def _retrain(self, tags=None):
        "Retrain autotaggers"
        session, config, idx = self.session, self.session.config, self._idx()
        tags = tags or [asb.match_tag for asb in autotag_configs(config)]
        tids = [config.get_tag(t)._key for t in tags if t]

        session.ui.mark(_('Retraining SpamBayes autotaggers'))
        if not config.real_hasattr('autotag'):
            config.real_setattr('autotag', {})

        # Find all the interesting messages! We don't look in the trash,
        # but we do look at interesting spam.
        #
        # Note: By specifically stating that we DON'T want trash, we
        #       disable the search engine's default result suppression
        #       and guarantee these results don't corrupt the somewhat
        #       lame/broken result cache.
        #
        no_trash = ['-in:%s' % t._key for t in config.get_tags(type='trash')]
        interest = {}
        for ttype in ('replied', 'fwded', 'read', 'tagged'):
            interest[ttype] = set()
            for tag in config.get_tags(type=ttype):
                interest[ttype] |= idx.search(session, ['in:%s' % tag.slug] +
                                              no_trash).as_set()
            session.ui.notify(
                _('Have %d interesting %s messages') %
                (len(interest[ttype]), ttype))

        retrained, unreadable = [], []
        count_all = 0
        for at_config in autotag_configs(config):
            at_tag = config.get_tag(at_config.match_tag)
            if at_tag and at_tag._key in tids:
                session.ui.mark('Retraining: %s' % at_tag.name)

                yn = [(set(), set(), 'in:%s' % at_tag.slug, True),
                      (set(), set(), '-in:%s' % at_tag.slug, False)]

                # Get the current message sets: tagged and untagged messages
                # excluding trash.
                for tset, mset, srch, which in yn:
                    mset |= idx.search(session, [srch] + no_trash).as_set()

                # If we have any exclude_tags, they are particularly
                # interesting, so we'll look at them first.
                interesting = []
                for etagid in at_config.exclude_tags:
                    etag = config.get_tag(etagid)
                    if etag._key not in interest:
                        srch = ['in:%s' % etag._key] + no_trash
                        interest[etag._key] = idx.search(session,
                                                         srch).as_set()
                    interesting.append(etag._key)
                interesting.extend(
                    ['replied', 'fwded', 'read', 'tagged', None])

                # Go through the interest types in order of preference and
                # while we still lack training data, add to the training set.
                for ttype in interesting:
                    for tset, mset, srch, which in yn:
                        # False positives are really annoying, and generally
                        # speaking any autotagged subset should be a small
                        # part of the Universe. So we divide the corpus
                        # budget 33% True, 67% False.
                        full_size = int(at_config.corpus_size *
                                        (0.33 if which else 0.67))
                        want = min(full_size // 4, max(0,
                                                       full_size - len(tset)))
                        if want:
                            if ttype:
                                adding = sorted(list(mset & interest[ttype]))
                            else:
                                adding = sorted(list(mset))
                            adding = set(list(reversed(adding))[:want])
                            tset |= adding
                            mset -= adding

                # Load classifier, reset
                atagger = config.load_auto_tagger(at_config)
                atagger.reset(at_config)
                for tset, mset, srch, which in yn:
                    count = 0
                    # We go through the liste of message in order, to avoid
                    # thrashing caches too badly.
                    for msg_idx in sorted(list(tset)):
                        try:
                            e = Email(idx, msg_idx)
                            count += 1
                            count_all += 1
                            session.ui.mark(
                                _('Reading %s (%d/%d, %s=%s)') %
                                (e.msg_mid(), count, len(tset), at_tag.name,
                                 which))
                            atagger.learn(at_config, e.get_msg(),
                                          self._get_keywords(e), which)
                        except (IndexError, TypeError, ValueError, OSError,
                                IOError):
                            if session.config.sys.debug:
                                import traceback
                                traceback.print_exc()
                            unreadable.append(msg_idx)
                            session.ui.warning(
                                _('Failed to process message at =%s') %
                                (b36(msg_idx)))

                # We got this far without crashing, so save the result.
                config.save_auto_tagger(at_config)
                retrained.append(at_tag.name)

        message = _('Retrained SpamBayes auto-tagging for %s') % ', '.join(
            retrained)
        session.ui.mark(message)
        return self._success(message,
                             result={
                                 'retrained': retrained,
                                 'unreadable': unreadable,
                                 'read_messages': count_all
                             })
Exemplo n.º 14
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()

        # Command-line arguments...
        msgs = list(self.args)
        timeout = -1
        tracking_id = None
        with_header = False
        without_mid = False
        columns = []
        while msgs and msgs[0].lower() != '--':
            arg = msgs.pop(0)
            if arg.startswith('--timeout='):
                timeout = float(arg[10:])
            elif arg.startswith('--header'):
                with_header = True
            elif arg.startswith('--no-mid'):
                without_mid = True
            else:
                columns.append(arg)
        if msgs and msgs[0].lower() == '--':
            msgs.pop(0)

        # Form arguments...
        timeout = float(self.data.get('timeout', [timeout])[0])
        with_header |= truthy(self.data.get('header', [''])[0])
        without_mid |= truthy(self.data.get('no-mid', [''])[0])
        tracking_id = self.data.get('track-id', [tracking_id])[0]
        columns.extend(self.data.get('term', []))
        msgs.extend(['={0!s}'.format(mid.replace('=', ''))
                     for mid in self.data.get('mid', [])])

        # Add a header to the CSV if requested
        if with_header:
            results = [[col.split('||')[0].split(':', 1)[0].split('=', 1)[0]
                        for col in columns]]
            if not without_mid:
                results[0] = ['MID'] + results[0]
        else:
            results = []

        deadline = (time.time() + timeout) if (timeout > 0) else None
        msg_idxs = self._choose_messages(msgs)
        progress = []
        for msg_idx in msg_idxs:
            e = Email(idx, msg_idx)
            if self.event and tracking_id:
                progress.append(msg_idx)
                self.event.private_data = {"progress": len(progress),
                                           "track-id": tracking_id,
                                           "total": len(msg_idxs),
                                           "reading": e.msg_mid()}
                self.event.message = _('Digging into =%s') % e.msg_mid()
                self._update_event_state(self.event.RUNNING, log=True)
            else:
                session.ui.mark(_('Digging into =%s') % e.msg_mid())
            row = [] if without_mid else ['{0!s}'.format(e.msg_mid())]
            for cellspec in columns:
                row.extend(self._cell(idx, e, cellspec))
            results.append(row)
            if deadline and deadline < time.time():
                break

        return self._success(_('Found %d rows in %d messages'
                               ) % (len(results), len(msg_idxs)), results)
Exemplo n.º 15
0
    def command(self, save=True):
        session, config, idx = self.session, self.session.config, self._idx()
        mbox_type = config.prefs.export_format

        args = list(self.args)
        if args and ':' in args[-1]:
            mbox_type, path = args.pop(-1).split(':', 1)
        else:
            path = self.export_path(mbox_type)

        flat = notags = False
        while args and args[0][:1] == '-':
            option = args.pop(0).replace('-', '')
            if option == 'flat':
                flat = True
            elif option == 'notags':
                notags = True

        if os.path.exists(path):
            return self._error('Already exists: %s' % path)

        msg_idxs = list(self._choose_messages(args))
        if not msg_idxs:
            session.ui.warning('No messages selected')
            return False

        # Exporting messages without their threads barely makes any
        # sense.
        if not flat:
            for i in reversed(range(0, len(msg_idxs))):
                mi = msg_idxs[i]
                msg_idxs[i:i + 1] = [
                    int(m[idx.MSG_MID], 36)
                    for m in idx.get_conversation(msg_idx=mi)
                ]

        # Let's always export in the same order. Stability is nice.
        msg_idxs.sort()

        try:
            mbox = self.create_mailbox(mbox_type, path)
        except (IOError, OSError):
            mbox = None
        if mbox is None:
            if not os.path.exists(os.path.dirname(path)):
                reason = _('Parent directory does not exist.')
            else:
                reason = _('Is the disk full? Are permissions lacking?')
            return self._error(_('Failed to create mailbox: %s') % reason)

        exported = {}
        failed = []
        while msg_idxs:
            msg_idx = msg_idxs.pop(0)
            if msg_idx not in exported:
                e = Email(idx, msg_idx)
                session.ui.mark(_('Exporting message =%s ...') % e.msg_mid())
                fd = e.get_file()
                if fd is None:
                    failed.append(e.msg_mid())
                    session.ui.warning(
                        _('Message =%s is unreadable! Skipping.') %
                        e.msg_mid())
                    continue
                try:
                    data = fd.read()
                    if not notags:
                        tags = [
                            tag.slug for tag in
                            (self.session.config.get_tag(t) or t
                             for t in e.get_msg_info(idx.MSG_TAGS).split(',')
                             if t) if hasattr(tag, 'slug')
                        ]
                        lf = '\r\n' if ('\r\n' in data[:200]) else '\n'
                        header, body = data.split(lf + lf, 1)
                        data = str(
                            lf.join([
                                header, 'X-Mailpile-Tags: ' +
                                '; '.join(sorted(tags)).encode('utf-8'), '',
                                body
                            ]))
                    mbox.add(data.replace('\r\n', '\n'))
                    exported[msg_idx] = 1
                finally:
                    fd.close()

        mbox.flush()
        result = {'exported': len(exported), 'created': path}
        if failed:
            result['failed'] = failed
        return self._success(
            _('Exported %d messages to %s') % (len(exported), path), result)
Exemplo n.º 16
0
    def command(self, save=True):
        session, config, idx = self.session, self.session.config, self._idx()
        mbox_type = config.prefs.export_format

        args = list(self.args)
        if args and ':' in args[-1]:
            mbox_type, path = args.pop(-1).split(':', 1)
        else:
            path = self.export_path(mbox_type)

        flat = notags = False
        while args and args[0][:1] == '-':
            option = args.pop(0).replace('-', '')
            if option == 'flat':
                flat = True
            elif option == 'notags':
                notags = True

        if os.path.exists(path):
            return self._error('Already exists: %s' % path)

        msg_idxs = list(self._choose_messages(args))
        if not msg_idxs:
            session.ui.warning('No messages selected')
            return False

        # Exporting messages without their threads barely makes any
        # sense.
        if not flat:
            for i in reversed(range(0, len(msg_idxs))):
                mi = msg_idxs[i]
                msg_idxs[i:i+1] = [int(m[idx.MSG_MID], 36)
                                   for m in idx.get_conversation(msg_idx=mi)]

        # Let's always export in the same order. Stability is nice.
        msg_idxs.sort()

        try:
            mbox = self.create_mailbox(mbox_type, path)
        except (IOError, OSError):
            mbox = None
        if mbox is None:
            if not os.path.exists(os.path.dirname(path)):
                reason = _('Parent directory does not exist.')
            else:
                reason = _('Is the disk full? Are permissions lacking?')
            return self._error(_('Failed to create mailbox: %s') % reason)

        exported = {}
        failed = []
        while msg_idxs:
            msg_idx = msg_idxs.pop(0)
            if msg_idx not in exported:
                e = Email(idx, msg_idx)
                session.ui.mark(_('Exporting message =%s ...') % e.msg_mid())
                fd = e.get_file()
                if fd is None:
                    failed.append(e.msg_mid())
                    session.ui.warning(_('Message =%s is unreadable! Skipping.'
                                         ) % e.msg_mid())
                    continue
                try:
                    data = fd.read()
                    if not notags:
                        tags = [tag.slug for tag in
                                (self.session.config.get_tag(t) or t for t
                                 in e.get_msg_info(idx.MSG_TAGS).split(',')
                                 if t)
                                if hasattr(tag, 'slug')]
                        lf = '\r\n' if ('\r\n' in data[:200]) else '\n'
                        header, body = data.split(lf+lf, 1)
                        data = str(lf.join([
                            header,
                            'X-Mailpile-Tags: ' + '; '.join(sorted(tags)
                                                            ).encode('utf-8'),
                            '',
                            body
                        ]))
                    mbox.add(data.replace('\r\n', '\n'))
                    exported[msg_idx] = 1
                finally:
                    fd.close()

        mbox.flush()
        result = {
            'exported': len(exported),
            'created': path
        }
        if failed:
            result['failed'] = failed
        return self._success(
            _('Exported %d messages to %s') % (len(exported), path),
            result)
Exemplo n.º 17
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()
        tags = self.args or [asb.match_tag for asb in config.prefs.autotag]
        tids = [config.get_tag(t)._key for t in tags if t]

        session.ui.mark(_('Retraining SpamBayes autotaggers'))
        if not hasattr(config, 'autotag'):
            config.autotag = {}

        # Find all the interesting messages! We don't look in the trash,
        # but we do look at interesting spam.
        #
        # Note: By specifically stating that we DON'T want trash, we
        #       disable the search engine's default result suppression
        #       and guarantee these results don't corrupt the somewhat
        #       lame/broken result cache.
        #
        no_trash = ['-in:%s' % t._key for t in config.get_tags(type='trash')]
        interest = {}
        for ttype in ('replied', 'read', 'tagged'):
            interest[ttype] = set()
            for tag in config.get_tags(type=ttype):
                interest[ttype] |= idx.search(session,
                                              ['in:%s' % tag.slug] + no_trash
                                              ).as_set()
            session.ui.notify(_('Have %d interesting %s messages'
                                ) % (len(interest[ttype]), ttype))

        retrained = []
        count_all = 0
        for at_config in config.prefs.autotag:
            at_tag = config.get_tag(at_config.match_tag)
            if at_tag and at_tag._key in tids:
                session.ui.mark('Retraining: %s' % at_tag.name)

                yn = [(set(), set(), 'in:%s' % at_tag.slug, True),
                      (set(), set(), '-in:%s' % at_tag.slug, False)]

                # Get the current message sets: tagged and untagged messages
                # excluding trash.
                for tset, mset, srch, which in yn:
                    mset |= idx.search(session, [srch] + no_trash).as_set()

                # If we have any exclude_tags, they are particularly
                # interesting, so we'll look at them first.
                interesting = []
                for etagid in at_config.exclude_tags:
                    etag = config.get_tag(etagid)
                    if etag._key not in interest:
                        srch = ['in:%s' % etag._key] + no_trash
                        interest[etag._key] = idx.search(session, srch
                                                         ).as_set()
                    interesting.append(etag._key)
                interesting.extend(['replied', 'read', 'tagged', None])

                # Go through the interest types in order of preference and
                # while we still lack training data, add to the training set.
                for ttype in interesting:
                    for tset, mset, srch, which in yn:
                        # FIXME: Is this a good idea? No single data source
                        # is allowed to be more than 50% of the corpus, to
                        # try and encourage diversity.
                        want = min(at_config.corpus_size / 4,
                                   max(0,
                                       at_config.corpus_size / 2 - len(tset)))
                        if want:
                            if ttype:
                                adding = sorted(list(mset & interest[ttype]))
                            else:
                                adding = sorted(list(mset))
                            adding = set(list(reversed(adding))[:want])
                            tset |= adding
                            mset -= adding

                # Load classifier, reset
                atagger = config.load_auto_tagger(at_config)
                atagger.reset(at_config)
                for tset, mset, srch, which in yn:
                    count = 0
                    for msg_idx in tset:
                        e = Email(idx, msg_idx)
                        count += 1
                        count_all += 1
                        session.ui.mark(('Reading %s (%d/%d, %s=%s)'
                                         ) % (e.msg_mid(), count, len(tset),
                                              at_tag.name, which))
                        atagger.learn(at_config,
                                      e.get_msg(),
                                      self._get_keywords(e),
                                      which)

                # We got this far without crashing, so save the result.
                config.save_auto_tagger(at_config)
                retrained.append(at_tag.name)

        session.ui.mark(_('Retrained SpamBayes auto-tagging for %s'
                          ) % ', '.join(retrained))
        return {'retrained': retrained, 'read_messages': count_all}
Exemplo n.º 18
0
    def _retrain(self, tags=None):
        "Retrain autotaggers"
        session, config, idx = self.session, self.session.config, self._idx()
        tags = tags or [asb.match_tag for asb in autotag_configs(config)]
        tids = [config.get_tag(t)._key for t in tags if t]

        session.ui.mark(_('Retraining SpamBayes autotaggers'))
        if not config.real_hasattr('autotag'):
            config.real_setattr('autotag', {})

        # Find all the interesting messages! We don't look in the trash,
        # but we do look at interesting spam.
        #
        # Note: By specifically stating that we DON'T want trash, we
        #       disable the search engine's default result suppression
        #       and guarantee these results don't corrupt the somewhat
        #       lame/broken result cache.
        #
        no_trash = ['-in:%s' % t._key for t in config.get_tags(type='trash')]
        interest = {}
        for ttype in ('replied', 'read', 'tagged'):
            interest[ttype] = set()
            for tag in config.get_tags(type=ttype):
                interest[ttype] |= idx.search(session,
                                              ['in:%s' % tag.slug] + no_trash
                                              ).as_set()
            session.ui.notify(_('Have %d interesting %s messages'
                                ) % (len(interest[ttype]), ttype))

        retrained, unreadable = [], []
        count_all = 0
        for at_config in autotag_configs(config):
            at_tag = config.get_tag(at_config.match_tag)
            if at_tag and at_tag._key in tids:
                session.ui.mark('Retraining: %s' % at_tag.name)

                yn = [(set(), set(), 'in:%s' % at_tag.slug, True),
                      (set(), set(), '-in:%s' % at_tag.slug, False)]

                # Get the current message sets: tagged and untagged messages
                # excluding trash.
                for tset, mset, srch, which in yn:
                    mset |= idx.search(session, [srch] + no_trash).as_set()

                # If we have any exclude_tags, they are particularly
                # interesting, so we'll look at them first.
                interesting = []
                for etagid in at_config.exclude_tags:
                    etag = config.get_tag(etagid)
                    if etag._key not in interest:
                        srch = ['in:%s' % etag._key] + no_trash
                        interest[etag._key] = idx.search(session, srch
                                                         ).as_set()
                    interesting.append(etag._key)
                interesting.extend(['replied', 'read', 'tagged', None])

                # Go through the interest types in order of preference and
                # while we still lack training data, add to the training set.
                for ttype in interesting:
                    for tset, mset, srch, which in yn:
                        # False positives are really annoying, and generally
                        # speaking any autotagged subset should be a small
                        # part of the Universe. So we divide the corpus
                        # budget 33% True, 67% False.
                        full_size = int(at_config.corpus_size *
                                        (0.33 if which else 0.67))
                        want = min(full_size // len(interesting),
                                   max(0, full_size - len(tset)))
                        # Make sure we always fully utilize our budget
                        if full_size > len(tset) and not ttype:
                            want = full_size - len(tset)

                        if want:
                            if ttype:
                                adding = sorted(list(mset & interest[ttype]))
                            else:
                                adding = sorted(list(mset))
                            adding = set(list(reversed(adding))[:want])
                            tset |= adding
                            mset -= adding

                # Load classifier, reset
                atagger = config.load_auto_tagger(at_config)
                atagger.reset(at_config)
                for tset, mset, srch, which in yn:
                    count = 0
                    # We go through the list of message in order, to avoid
                    # thrashing caches too badly.
                    for msg_idx in sorted(list(tset)):
                        try:
                            e = Email(idx, msg_idx)
                            count += 1
                            count_all += 1
                            session.ui.mark(
                                _('Reading %s (%d/%d, %s=%s)'
                                  ) % (e.msg_mid(), count, len(tset),
                                       at_tag.name, which))
                            atagger.learn(at_config,
                                          e.get_msg(),
                                          self._get_keywords(e),
                                          which)
                            play_nice_with_threads()
                            if mailpile.util.QUITTING:
                                return self._error('Aborted')
                        except (IndexError, TypeError, ValueError,
                                OSError, IOError):
                            if 'autotag' in session.config.sys.debug:
                                import traceback
                                traceback.print_exc()
                            unreadable.append(msg_idx)
                            session.ui.warning(
                                _('Failed to process message at =%s'
                                  ) % (b36(msg_idx)))

                # We got this far without crashing, so save the result.
                config.save_auto_tagger(at_config)
                retrained.append(at_tag.name)

        message = _('Retrained SpamBayes auto-tagging for %s'
                    ) % ', '.join(retrained)
        session.ui.mark(message)
        return self._success(message, result={
            'retrained': retrained,
            'unreadable': unreadable,
            'read_messages': count_all
        })
Exemplo n.º 19
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()
        tags = self.args or [asb.match_tag for asb in config.prefs.autotag]
        tids = [config.get_tag(t)._key for t in tags if t]

        session.ui.mark(_('Retraining SpamBayes autotaggers'))
        if not hasattr(config, 'autotag'):
            config.autotag = {}

        # Find all the interesting messages! We don't look in the trash,
        # but we do look at interesting spam.
        #
        # Note: By specifically stating that we DON'T want trash, we
        #       disable the search engine's default result suppression
        #       and guarantee these results don't corrupt the somewhat
        #       lame/broken result cache.
        #
        no_trash = ['-in:%s' % t._key for t in config.get_tags(type='trash')]
        interest = {}
        for ttype in ('replied', 'fwded', 'read', 'tagged'):
            interest[ttype] = set()
            for tag in config.get_tags(type=ttype):
                interest[ttype] |= idx.search(session, ['in:%s' % tag.slug] +
                                              no_trash).as_set()
            session.ui.notify(
                _('Have %d interesting %s messages') %
                (len(interest[ttype]), ttype))

        retrained = []
        count_all = 0
        for at_config in config.prefs.autotag:
            at_tag = config.get_tag(at_config.match_tag)
            if at_tag and at_tag._key in tids:
                session.ui.mark('Retraining: %s' % at_tag.name)

                yn = [(set(), set(), 'in:%s' % at_tag.slug, True),
                      (set(), set(), '-in:%s' % at_tag.slug, False)]

                # Get the current message sets: tagged and untagged messages
                # excluding trash.
                for tset, mset, srch, which in yn:
                    mset |= idx.search(session, [srch] + no_trash).as_set()

                # If we have any exclude_tags, they are particularly
                # interesting, so we'll look at them first.
                interesting = []
                for etagid in at_config.exclude_tags:
                    etag = config.get_tag(etagid)
                    if etag._key not in interest:
                        srch = ['in:%s' % etag._key] + no_trash
                        interest[etag._key] = idx.search(session,
                                                         srch).as_set()
                    interesting.append(etag._key)
                interesting.extend(
                    ['replied', 'fwded', 'read', 'tagged', None])

                # Go through the interest types in order of preference and
                # while we still lack training data, add to the training set.
                for ttype in interesting:
                    for tset, mset, srch, which in yn:
                        # FIXME: Is this a good idea? No single data source
                        # is allowed to be more than 50% of the corpus, to
                        # try and encourage diversity.
                        want = min(
                            at_config.corpus_size / 4,
                            max(0, at_config.corpus_size / 2 - len(tset)))
                        if want:
                            if ttype:
                                adding = sorted(list(mset & interest[ttype]))
                            else:
                                adding = sorted(list(mset))
                            adding = set(list(reversed(adding))[:want])
                            tset |= adding
                            mset -= adding

                # Load classifier, reset
                atagger = config.load_auto_tagger(at_config)
                atagger.reset(at_config)
                for tset, mset, srch, which in yn:
                    count = 0
                    for msg_idx in tset:
                        e = Email(idx, msg_idx)
                        count += 1
                        count_all += 1
                        session.ui.mark(('Reading %s (%d/%d, %s=%s)') %
                                        (e.msg_mid(), count, len(tset),
                                         at_tag.name, which))
                        atagger.learn(at_config, e.get_msg(),
                                      self._get_keywords(e), which)

                # We got this far without crashing, so save the result.
                config.save_auto_tagger(at_config)
                retrained.append(at_tag.name)

        session.ui.mark(
            _('Retrained SpamBayes auto-tagging for %s') %
            ', '.join(retrained))
        return {'retrained': retrained, 'read_messages': count_all}
Exemplo n.º 20
0
    def command(self, save=True):
        session, config, idx = self.session, self.session.config, self._idx()
        mbox_type = config.prefs.export_format

        if self.session.config.sys.lockdown:
            return self._error(_('In lockdown, doing nothing.'))

        args = list(self.args)
        if args and ':' in args[-1]:
            mbox_type, path = args.pop(-1).split(':', 1)
        else:
            path = self.export_path(mbox_type)

        flat = notags = False
        while args and args[0][:1] == '-':
            option = args.pop(0).replace('-', '')
            if option == 'flat':
                flat = True
            elif option == 'notags':
                notags = True

        if os.path.exists(path):
            return self._error('Already exists: %s' % path)

        msg_idxs = list(self._choose_messages(args))
        if not msg_idxs:
            session.ui.warning('No messages selected')
            return False

        # Exporting messages without their threads barely makes any
        # sense.
        if not flat:
            for i in reversed(range(0, len(msg_idxs))):
                mi = msg_idxs[i]
                msg_idxs[i:i + 1] = [
                    int(m[idx.MSG_MID], 36)
                    for m in idx.get_conversation(msg_idx=mi)
                ]

        # Let's always export in the same order. Stability is nice.
        msg_idxs.sort()

        mbox = self.create_mailbox(mbox_type, path)
        exported = {}
        while msg_idxs:
            msg_idx = msg_idxs.pop(0)
            if msg_idx not in exported:
                e = Email(idx, msg_idx)
                session.ui.mark('Exporting =%s ...' % e.msg_mid())
                fd = e.get_file()
                try:
                    data = fd.read()
                    if not notags:
                        tags = [
                            tag.slug for tag in
                            (self.session.config.get_tag(t) or t
                             for t in e.get_msg_info(idx.MSG_TAGS).split(',')
                             if t) if hasattr(tag, 'slug')
                        ]
                        lf = '\r\n' if ('\r\n' in data[:200]) else '\n'
                        header, body = data.split(lf + lf, 1)
                        data = str(
                            lf.join([
                                header, 'X-Mailpile-Tags: ' +
                                '; '.join(sorted(tags)).encode('utf-8'), '',
                                body
                            ]))
                    mbox.add(data)
                    exported[msg_idx] = 1
                finally:
                    fd.close()

        mbox.flush()

        return self._success(
            _('Exported %d messages to %s') % (len(exported), path), {
                'exported': len(exported),
                'created': path
            })
Exemplo n.º 21
0
    def command(self, save=True):
        session, config, idx = self.session, self.session.config, self._idx()
        mbox_type = config.prefs.export_format

        if self.session.config.sys.lockdown:
            return self._error(_('In lockdown, doing nothing.'))

        args = list(self.args)
        if args and ':' in args[-1]:
            mbox_type, path = args.pop(-1).split(':', 1)
        else:
            path = self.export_path(mbox_type)

        flat = notags = False
        while args and args[0][:1] == '-':
            option = args.pop(0).replace('-', '')
            if option == 'flat':
                flat = True
            elif option == 'notags':
                notags = True

        if os.path.exists(path):
            return self._error('Already exists: %s' % path)

        msg_idxs = list(self._choose_messages(args))
        if not msg_idxs:
            session.ui.warning('No messages selected')
            return False

        # Exporting messages without their threads barely makes any
        # sense.
        if not flat:
            for i in reversed(range(0, len(msg_idxs))):
                mi = msg_idxs[i]
                msg_idxs[i:i+1] = [int(m[idx.MSG_MID], 36)
                                   for m in idx.get_conversation(msg_idx=mi)]

        # Let's always export in the same order. Stability is nice.
        msg_idxs.sort()

        mbox = self.create_mailbox(mbox_type, path)
        exported = {}
        while msg_idxs:
            msg_idx = msg_idxs.pop(0)
            if msg_idx not in exported:
                e = Email(idx, msg_idx)
                session.ui.mark('Exporting =%s ...' % e.msg_mid())
                fd = e.get_file()
                try:
                    data = fd.read()
                    if not notags:
                        tags = [tag.slug for tag in
                                (self.session.config.get_tag(t) or t for t
                                 in e.get_msg_info(idx.MSG_TAGS).split(',')
                                 if t)
                                if hasattr(tag, 'slug')]
                        lf = '\r\n' if ('\r\n' in data[:200]) else '\n'
                        header, body = data.split(lf+lf, 1)
                        data = str(lf.join([
                            header,
                            'X-Mailpile-Tags: ' + '; '.join(sorted(tags)
                                                            ).encode('utf-8'),
                            '',
                            body
                        ]))
                    mbox.add(data.replace('\r\n', '\n'))
                    exported[msg_idx] = 1
                finally:
                    fd.close()

        mbox.flush()

        return self._success(
            _('Exported %d messages to %s') % (len(exported), path),
            {
                'exported': len(exported),
                'created': path
            })
Exemplo n.º 22
0
    def command(self):
        session, config, idx = self.session, self.session.config, self._idx()

        # Command-line arguments...
        msgs = list(self.args)
        timeout = -1
        tracking_id = None
        with_header = False
        without_mid = False
        columns = []
        while msgs and msgs[0].lower() != '--':
            arg = msgs.pop(0)
            if arg.startswith('--timeout='):
                timeout = float(arg[10:])
            elif arg.startswith('--header'):
                with_header = True
            elif arg.startswith('--no-mid'):
                without_mid = True
            else:
                columns.append(arg)
        if msgs and msgs[0].lower() == '--':
            msgs.pop(0)

        # Form arguments...
        timeout = float(self.data.get('timeout', [timeout])[0])
        with_header |= self._truthy(self.data.get('header', [''])[0])
        without_mid |= self._truthy(self.data.get('no-mid', [''])[0])
        tracking_id = self.data.get('track-id', [tracking_id])[0]
        columns.extend(self.data.get('term', []))
        msgs.extend(
            ['=%s' % mid.replace('=', '') for mid in self.data.get('mid', [])])

        # Add a header to the CSV if requested
        if with_header:
            results = [[
                col.split('||')[0].split(':', 1)[0].split('=', 1)[0]
                for col in columns
            ]]
            if not without_mid:
                results[0] = ['MID'] + results[0]
        else:
            results = []

        deadline = (time.time() + timeout) if (timeout > 0) else None
        msg_idxs = self._choose_messages(msgs)
        progress = []
        for msg_idx in msg_idxs:
            e = Email(idx, msg_idx)
            if self.event and tracking_id:
                progress.append(msg_idx)
                self.event.private_data = {
                    "progress": len(progress),
                    "track-id": tracking_id,
                    "total": len(msg_idxs),
                    "reading": e.msg_mid()
                }
                self.event.message = _('Digging into =%s') % e.msg_mid()
                self._update_event_state(self.event.RUNNING, log=True)
            else:
                session.ui.mark(_('Digging into =%s') % e.msg_mid())
            row = [] if without_mid else ['%s' % e.msg_mid()]
            for cellspec in columns:
                row.extend(self._cell(idx, e, cellspec))
            results.append(row)
            if deadline and deadline < time.time():
                break

        return self._success(
            _('Found %d rows in %d messages') % (len(results), len(msg_idxs)),
            results)