def _split_into_parts(raw): # TODO signal that we can delete the original message!----- # when all the processing is done. # TODO add the linked-from info ! # TODO add reference to the original message? # TODO populate Default FLAGS/TAGS (unseen?) # TODO seed propely the content_docs with defaults?? msg, chash, multi = _parse_msg(raw) size = len(msg.as_string()) parts_map = walk.get_tree(msg) cdocs_list = list(walk.get_raw_docs(msg)) cdocs_phashes = [c['phash'] for c in cdocs_list] body_phash = walk.get_body_phash(msg) mdoc = _build_meta_doc(chash, cdocs_phashes) fdoc = _build_flags_doc(chash, size, multi) hdoc = _build_headers_doc(msg, chash, body_phash, parts_map) # The MessageWrapper expects a dict, one-indexed cdocs = dict(enumerate(cdocs_list, 1)) return mdoc, fdoc, hdoc, cdocs
FILENAME = "rfc822.plain.message" FILENAME = "rfc822.multi-minimal.message" """ msg = p.parse(open(FILENAME)) DO_CHECK = False ################################################# parts = W.get_parts(msg) if DEBUG: def trim(item): item = item[:10] [trim(part["phash"]) for part in parts if part.get('phash', None)] raw_docs = list(W.get_raw_docs(msg, parts)) body_phash_fun = [W.get_body_phash_simple, W.get_body_phash_multi][int(msg.is_multipart())] body_phash = body_phash_fun(W.get_payloads(msg)) parts_map = W.walk_msg_tree(parts, body_phash=body_phash) # TODO add missing headers! expected = { 'body': '1ddfa80485', 'multi': True, 'part_map': { 1: { 'headers': {'Content-Disposition': 'inline', 'Content-Type': 'multipart/mixed; '