Пример #1
0
    def add_maildir(self, maildir_path):
        """ Load up a maildir add compute hash for each mail their contain. """
        maildir = Maildir(maildir_path, create=False)
        # Collate folders by hash.
        print("Processing {} mails in {}".format(len(maildir), maildir._path))
        for mail_id, message in maildir.iteritems():
            mail_file = os.path.join(maildir._path, maildir._lookup(mail_id))
            try:
                mail_hash, header_text = self.compute_hash(
                    mail_file, message, self.use_message_id)
            except InsufficientHeadersError as e:
                print("WARNING: ignoring problematic {}: {}".format(
                    mail_file, e.args[0]))
            else:
                if self.mail_count > 0 and self.mail_count % 100 == 0:
                    print(".")
                # print("Hash is {} for mail {!r}.".format(mail_hash, mail_id))
                if mail_hash not in self.mails:
                    self.mails[mail_hash] = []

                self.mails[mail_hash].append((mail_file, message))
                self.mail_count += 1
    def add_maildir(self, maildir_path):
        """ Load up a maildir add compute hash for each mail their contain. """
        maildir = Maildir(maildir_path, create=False)
        # Collate folders by hash.
        print("Processing {} mails in {}".format(len(maildir), maildir._path))
        for mail_id, message in maildir.iteritems():
            mail_file = os.path.join(maildir._path, maildir._lookup(mail_id))
            try:
                mail_hash, header_text = self.compute_hash(
                    mail_file, message, self.use_message_id)
            except InsufficientHeadersError as e:
                print("WARNING: ignoring problematic {}: {}".format(
                    mail_file, e.args[0]))
            else:
                if self.mail_count > 0 and self.mail_count % 100 == 0:
                    print(".")
                # print("Hash is {} for mail {!r}.".format(mail_hash, mail_id))
                if mail_hash not in self.mails:
                    self.mails[mail_hash] = []

                self.mails[mail_hash].append((mail_file, message))
                self.mail_count += 1
Пример #3
0
    def add_maildir(self, maildir_path):
        """ Load up a maildir and compute hash for each mail found. """
        maildir_path = self.canonical_path(maildir_path)
        logger.info("Opening maildir at {} ...".format(maildir_path))
        # Maildir parser requires a string, not a unicode, as path.
        maildir = Maildir(str(maildir_path), factory=None, create=False)

        # Group folders by hash.
        logger.info("{} mails found.".format(len(maildir)))
        if self.conf.progress:
            bar = ProgressBar(widgets=[Percentage(), Bar()],
                              max_value=len(maildir),
                              redirect_stderr=True,
                              redirect_stdout=True)
        else:

            def bar(x):
                return x

        for mail_id in bar(maildir.iterkeys()):
            self.stats['mail_found'] += 1

            mail_path = self.canonical_path(
                os.path.join(maildir._path, maildir._lookup(mail_id)))
            mail = Mail(mail_path, self.conf)

            try:
                mail_hash = mail.hash_key
            except (InsufficientHeadersError, MissingMessageID) as expt:
                logger.warning("Rejecting {}: {}".format(
                    mail_path, expt.args[0]))
                self.stats['mail_rejected'] += 1
            else:
                logger.debug("Hash is {} for mail {!r}.".format(
                    mail_hash, mail_id))
                # Use a set to deduplicate entries pointing to the same file.
                self.mails.setdefault(mail_hash, set()).add(mail_path)
                self.stats['mail_kept'] += 1
Пример #4
0
    def add_maildir(self, maildir_path):
        """ Load up a maildir and compute hash for each mail found. """
        maildir_path = self.canonical_path(maildir_path)
        logger.info("Opening maildir at {} ...".format(maildir_path))
        # Maildir parser requires a string, not a unicode, as path.
        maildir = Maildir(str(maildir_path), factory=None, create=False)

        # Group folders by hash.
        logger.info("{} mails found.".format(len(maildir)))
        if self.conf.progress:
            bar = ProgressBar(widgets=[Percentage(), Bar()],
                              max_value=len(maildir), redirect_stderr=True,
                              redirect_stdout=True)
        else:
            def bar(x):
                return x

        for mail_id in bar(maildir.iterkeys()):
            self.stats['mail_found'] += 1

            mail_path = self.canonical_path(os.path.join(
                maildir._path, maildir._lookup(mail_id)))
            mail = Mail(mail_path, self.conf)

            try:
                mail_hash = mail.hash_key
            except (InsufficientHeadersError, MissingMessageID) as expt:
                logger.warning(
                    "Rejecting {}: {}".format(mail_path, expt.args[0]))
                self.stats['mail_rejected'] += 1
            else:
                logger.debug(
                    "Hash is {} for mail {!r}.".format(mail_hash, mail_id))
                # Use a set to deduplicate entries pointing to the same file.
                self.mails.setdefault(mail_hash, set()).add(mail_path)
                self.stats['mail_kept'] += 1