def _split_into_parts(raw): # TODO signal that we can delete the original message!----- # when all the processing is done. # TODO add the linked-from info ! # TODO add reference to the original message? # TODO populate Default FLAGS/TAGS (unseen?) # TODO seed propely the content_docs with defaults?? msg, parts, chash, multi = _parse_msg(raw) size = len(msg.as_string()) body_phash = walk.get_body_phash(msg) parts_map = walk.walk_msg_tree(parts, body_phash=body_phash) cdocs_list = list(walk.get_raw_docs(msg, parts)) cdocs_phashes = [c['phash'] for c in cdocs_list] mdoc = _build_meta_doc(chash, cdocs_phashes) fdoc = _build_flags_doc(chash, size, multi) hdoc = _build_headers_doc(msg, chash, body_phash, parts_map) # The MessageWrapper expects a dict, one-indexed cdocs = dict(enumerate(cdocs_list, 1)) return mdoc, fdoc, hdoc, cdocs
def _hdoc(self): if self._hd: return self._hd hd = {} hd[fields.HEADERS_KEY] = self.headers hd[fields.DATE_KEY] = self.headers['Date'] hd[fields.CONTENT_HASH_KEY] = self._get_chash() hd[fields.MSGID_KEY] = '' hd[fields.MULTIPART_KEY] = True hd[fields.SUBJECT_KEY] = self.headers.get('Subject') hd[fields.TYPE_KEY] = fields.TYPE_HEADERS_VAL hd[fields.BODY_KEY] = self._get_body_phash() hd[fields.PARTS_MAP_KEY] = walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash()) self._hd = hd return hd
def _hdoc(self): if self._hd: return self._hd hd = {} hd[fields.HEADERS_KEY] = self.headers hd[fields.DATE_KEY] = self.headers['Date'] hd[fields.CONTENT_HASH_KEY] = self._get_chash() hd[fields.MSGID_KEY] = '' hd[fields.MULTIPART_KEY] = True hd[fields.SUBJECT_KEY] = self.headers.get('Subject') hd[fields.TYPE_KEY] = fields.TYPE_HEADERS_VAL hd[fields.BODY_KEY] = self._get_body_phash() hd[fields.PARTS_MAP_KEY] = \ walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash())['part_map'] self._hd = hd return hd
def _hdoc(self): if self._hd: return self._hd # InputMail does not have a from header but we need it when persisted into soledad. headers = self.headers.copy() headers['From'] = InputMail.FROM_EMAIL_ADDRESS hd = {} hd[HEADERS_KEY] = headers hd[DATE_KEY] = headers['Date'] hd[CONTENT_HASH_KEY] = self._get_chash() hd[MSGID_KEY] = '' hd[MULTIPART_KEY] = True hd[SUBJECT_KEY] = headers.get('Subject') hd[TYPE_KEY] = fields.HEADERS hd[BODY_KEY] = self._get_body_phash() hd[PARTS_MAP_KEY] = \ walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash())['part_map'] self._hd = hd return hd
def _hdoc(self): if self._hd: return self._hd # InputMail does not have a from header but we need it when persisted into soledad. headers = self.headers.copy() headers['From'] = InputMail.FROM_EMAIL_ADDRESS hd = {} hd[fields.HEADERS_KEY] = headers hd[fields.DATE_KEY] = headers['Date'] hd[fields.CONTENT_HASH_KEY] = self._get_chash() hd[fields.MSGID_KEY] = '' hd[fields.MULTIPART_KEY] = True hd[fields.SUBJECT_KEY] = headers.get('Subject') hd[fields.TYPE_KEY] = fields.TYPE_HEADERS_VAL hd[fields.BODY_KEY] = self._get_body_phash() hd[fields.PARTS_MAP_KEY] = \ walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash())['part_map'] self._hd = hd return hd
DO_CHECK = False ################################################# parts = W.get_parts(msg) if DEBUG: def trim(item): item = item[:10] [trim(part["phash"]) for part in parts if part.get('phash', None)] raw_docs = list(W.get_raw_docs(msg, parts)) body_phash_fun = [W.get_body_phash_simple, W.get_body_phash_multi][int(msg.is_multipart())] body_phash = body_phash_fun(W.get_payloads(msg)) parts_map = W.walk_msg_tree(parts, body_phash=body_phash) # TODO add missing headers! expected = { 'body': '1ddfa80485', 'multi': True, 'part_map': { 1: { 'headers': {'Content-Disposition': 'inline', 'Content-Type': 'multipart/mixed; ' 'boundary="z0eOaCaDLjvTGF2l"'}, 'multi': True, 'part_map': {1: {'ctype': 'text/plain', 'headers': [ ('Content-Type',
def _do_add_msg(self, parse_result, flags, subject, date, notify_on_disk, observer): """ Helper that creates a new message document. Here lives the magic of the leap mail. Well, in soledad, really. See `add_msg` docstring for parameter info. :param parse_result: a tuple with the results of `self._do_parse` :type parse_result: tuple :param observer: a deferred that will be fired with the message uid when the adding succeed. :type observer: deferred """ # TODO signal that we can delete the original message!----- # when all the processing is done. # TODO add the linked-from info ! # TODO add reference to the original message msg, parts, chash, size, multi = parse_result # check for uniqueness -------------------------------- # Watch out! We're reserving a UID right after this! existing_uid = self._fdoc_already_exists(chash) if existing_uid: msg = self.get_msg_by_uid(existing_uid) # We can say the observer that we're done self.reactor.callFromThread(observer.callback, existing_uid) msg.setFlags((fields.DELETED_FLAG,), -1) return uid = self.memstore.increment_last_soledad_uid(self.mbox) # We can say the observer that we're done at this point, but # before that we should make sure it has no serious consequences # if we're issued, for instance, a fetch command right after... #self.reactor.callFromThread(observer.callback, uid) # if we did the notify, we need to invalidate the deferred # so not to try to fire it twice. #observer = None fd = self._populate_flags(flags, uid, chash, size, multi) hd = self._populate_headr(msg, chash, subject, date) body_phash_fun = [walk.get_body_phash_simple, walk.get_body_phash_multi][int(multi)] body_phash = body_phash_fun(walk.get_payloads(msg)) parts_map = walk.walk_msg_tree(parts, body_phash=body_phash) # add parts map to header doc # (body, multi, part_map) for key in parts_map: hd[key] = parts_map[key] del parts_map hd = stringify_parts_map(hd) # The MessageContainer expects a dict, one-indexed cdocs = dict(enumerate(walk.get_raw_docs(msg, parts), 1)) self.set_recent_flag(uid) msg_container = MessageWrapper(fd, hd, cdocs) self.memstore.create_message( self.mbox, uid, msg_container, observer=observer, notify_on_disk=notify_on_disk)
parts = W.get_parts(msg) if DEBUG: def trim(item): item = item[:10] [trim(part["phash"]) for part in parts if part.get('phash', None)] raw_docs = list(W.get_raw_docs(msg, parts)) body_phash_fun = [W.get_body_phash_simple, W.get_body_phash_multi][int(msg.is_multipart())] body_phash = body_phash_fun(W.get_payloads(msg)) parts_map = W.walk_msg_tree(parts, body_phash=body_phash) # TODO add missing headers! expected = { 'body': '1ddfa80485', 'multi': True, 'part_map': { 1: { 'headers': { 'Content-Disposition': 'inline', 'Content-Type': 'multipart/mixed; ' 'boundary="z0eOaCaDLjvTGF2l"' }, 'multi': True, 'part_map': { 1: {