def process_folders(self): for path, files in self.message_pack.items(): self.current_relpath = self.get_rel_path(path) for f in files: if CommonMethods.get_chunksize( ) != 0 and CommonMethods.get_chunksize() == self.chunks: # Render the folder and reopen self._fldr_render_reopen(path) self.chunks = 0 self.cur_fn = f self.message_generator(os.path.join(path, f)) self._fldr_render(path) self.account.close_account() if CommonMethods.get_stitch(): self.account.stitch_account()
def message_generator(self, path): """ This is the main method that extracts email messages from an mbox. :type path: str :param path: :return: """ b_mark = None buff = [] with open(path, 'rb') as fh: # Open the mbox found at path while True: line = CommonMethods.sanitize(fh.readline()) if len(line) == 0: # Clunky ass way to find end of file, but whatevs. write the final message and clear # buffer. self._transform_buffer(buff, path) buff = [] break if re.search(b'^From((\s(\"|.+).+\@)|(\s(\".+\")\s))', line): # Per RFC if b_mark is None: # Found the beginning of a message # set the beginning bit, and put everything else, until the next 'From ' block, # into a buffer. b_mark = 1 else: # Process the buffered message into an email.message.Message object b_mark = None if CommonMethods.get_chunksize( ) != 0 and CommonMethods.get_chunksize( ) == self.chunks: # Render the folder and reopen self._fldr_render_reopen(path) self.chunks = 0 self._transform_buffer(buff, path, fh.tell()) buff = [] buff.append(line)
def __init__(self, root_level, xml_dir, account_name): """Constructor for DirectoryWalker""" self.mbx = None # type: mailbox.mbox self.root = root_level self.folders = {} self.messages = [] self.current_relpath = None # type: str self.xml_dir = xml_dir self.account = Account(account_name, xml_dir) self.logger = logging.getLogger("MboxWalker") self.total_messages_processed = 0 # type: int self.chunks = CommonMethods.get_chunksize() # type: int self.tracking_pos = 0 # type: int self.messages_in_folder = 0 # type: int self.messages_no_start_fldr = 0 # type: int self.message_no_end_flder = 0 # type: int self.new_account = True self.mboxes = [] # type: list self.new_folder = False self.mesg_begin = re.compile('^From((\s(\"|.+).+\@)|(\s(\".+\")\s))') self.json_folders = [] if CommonMethods.get_store_json(): self.json_write = CommonMethods.get_json_directory()
def start_account(self): if CommonMethods.get_chunksize() != 0: self._start_account_chunks() return self.current_eaxs_file = os.path.join(self.xml_loc, self.xml_name) self._write_file()