def add_maildir(self, maildir_path): """ Load up a maildir add compute hash for each mail their contain. """ maildir = Maildir(maildir_path, create=False) # Collate folders by hash. print("Processing {} mails in {}".format(len(maildir), maildir._path)) for mail_id, message in maildir.iteritems(): mail_file = os.path.join(maildir._path, maildir._lookup(mail_id)) try: mail_hash, header_text = self.compute_hash( mail_file, message, self.use_message_id) except InsufficientHeadersError as e: print("WARNING: ignoring problematic {}: {}".format( mail_file, e.args[0])) else: if self.mail_count > 0 and self.mail_count % 100 == 0: print(".") # print("Hash is {} for mail {!r}.".format(mail_hash, mail_id)) if mail_hash not in self.mails: self.mails[mail_hash] = [] self.mails[mail_hash].append((mail_file, message)) self.mail_count += 1
def add_maildir(self, maildir_path): """ Load up a maildir and compute hash for each mail found. """ maildir_path = self.canonical_path(maildir_path) logger.info("Opening maildir at {} ...".format(maildir_path)) # Maildir parser requires a string, not a unicode, as path. maildir = Maildir(str(maildir_path), factory=None, create=False) # Group folders by hash. logger.info("{} mails found.".format(len(maildir))) if self.conf.progress: bar = ProgressBar(widgets=[Percentage(), Bar()], max_value=len(maildir), redirect_stderr=True, redirect_stdout=True) else: def bar(x): return x for mail_id in bar(maildir.iterkeys()): self.stats['mail_found'] += 1 mail_path = self.canonical_path( os.path.join(maildir._path, maildir._lookup(mail_id))) mail = Mail(mail_path, self.conf) try: mail_hash = mail.hash_key except (InsufficientHeadersError, MissingMessageID) as expt: logger.warning("Rejecting {}: {}".format( mail_path, expt.args[0])) self.stats['mail_rejected'] += 1 else: logger.debug("Hash is {} for mail {!r}.".format( mail_hash, mail_id)) # Use a set to deduplicate entries pointing to the same file. self.mails.setdefault(mail_hash, set()).add(mail_path) self.stats['mail_kept'] += 1
def add_maildir(self, maildir_path): """ Load up a maildir and compute hash for each mail found. """ maildir_path = self.canonical_path(maildir_path) logger.info("Opening maildir at {} ...".format(maildir_path)) # Maildir parser requires a string, not a unicode, as path. maildir = Maildir(str(maildir_path), factory=None, create=False) # Group folders by hash. logger.info("{} mails found.".format(len(maildir))) if self.conf.progress: bar = ProgressBar(widgets=[Percentage(), Bar()], max_value=len(maildir), redirect_stderr=True, redirect_stdout=True) else: def bar(x): return x for mail_id in bar(maildir.iterkeys()): self.stats['mail_found'] += 1 mail_path = self.canonical_path(os.path.join( maildir._path, maildir._lookup(mail_id))) mail = Mail(mail_path, self.conf) try: mail_hash = mail.hash_key except (InsufficientHeadersError, MissingMessageID) as expt: logger.warning( "Rejecting {}: {}".format(mail_path, expt.args[0])) self.stats['mail_rejected'] += 1 else: logger.debug( "Hash is {} for mail {!r}.".format(mail_hash, mail_id)) # Use a set to deduplicate entries pointing to the same file. self.mails.setdefault(mail_hash, set()).add(mail_path) self.stats['mail_kept'] += 1