def _hdoc(self):
        if self._hd:
            return self._hd

        hd = {}
        hd[fields.HEADERS_KEY] = self.headers
        hd[fields.DATE_KEY] = self.headers['Date']
        hd[fields.CONTENT_HASH_KEY] = self._get_chash()
        hd[fields.MSGID_KEY] = ''
        hd[fields.MULTIPART_KEY] = True
        hd[fields.SUBJECT_KEY] = self.headers.get('Subject')
        hd[fields.TYPE_KEY] = fields.TYPE_HEADERS_VAL
        hd[fields.BODY_KEY] = self._get_body_phash()
        hd[fields.PARTS_MAP_KEY] = walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash())

        self._hd = hd
        return hd
Пример #2
0
    def _hdoc(self):
        if self._hd:
            return self._hd

        hd = {}
        hd[fields.HEADERS_KEY] = self.headers
        hd[fields.DATE_KEY] = self.headers['Date']
        hd[fields.CONTENT_HASH_KEY] = self._get_chash()
        hd[fields.MSGID_KEY] = ''
        hd[fields.MULTIPART_KEY] = True
        hd[fields.SUBJECT_KEY] = self.headers.get('Subject')
        hd[fields.TYPE_KEY] = fields.TYPE_HEADERS_VAL
        hd[fields.BODY_KEY] = self._get_body_phash()
        hd[fields.PARTS_MAP_KEY] = \
            walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash())['part_map']

        self._hd = hd
        return hd
Пример #3
0
    def _do_parse(self, raw):
        """
        Parse raw message and return it along with
        relevant information about its outer level.

        This is done in a separate thread, and the callback is passed
        to `_do_add_msg` method.

        :param raw: the raw message
        :type raw: StringIO or basestring
        :return: msg, parts, chash, size, multi
        :rtype: tuple
        """
        msg = message_from_string(raw)
        parts = walk.get_parts(msg)
        size = len(raw)
        chash = sha256.SHA256(raw).hexdigest()
        multi = msg.is_multipart()
        return msg, parts, chash, size, multi
Пример #4
0
    def _hdoc(self):
        if self._hd:
            return self._hd

        # InputMail does not have a from header but we need it when persisted into soledad.
        headers = self.headers.copy()
        headers['From'] = InputMail.FROM_EMAIL_ADDRESS

        hd = {}
        hd[HEADERS_KEY] = headers
        hd[DATE_KEY] = headers['Date']
        hd[CONTENT_HASH_KEY] = self._get_chash()
        hd[MSGID_KEY] = ''
        hd[MULTIPART_KEY] = True
        hd[SUBJECT_KEY] = headers.get('Subject')
        hd[TYPE_KEY] = fields.HEADERS
        hd[BODY_KEY] = self._get_body_phash()
        hd[PARTS_MAP_KEY] = \
            walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash())['part_map']

        self._hd = hd
        return hd
Пример #5
0
    def _hdoc(self):
        if self._hd:
            return self._hd

        # InputMail does not have a from header but we need it when persisted into soledad.
        headers = self.headers.copy()
        headers['From'] = InputMail.FROM_EMAIL_ADDRESS

        hd = {}
        hd[fields.HEADERS_KEY] = headers
        hd[fields.DATE_KEY] = headers['Date']
        hd[fields.CONTENT_HASH_KEY] = self._get_chash()
        hd[fields.MSGID_KEY] = ''
        hd[fields.MULTIPART_KEY] = True
        hd[fields.SUBJECT_KEY] = headers.get('Subject')
        hd[fields.TYPE_KEY] = fields.TYPE_HEADERS_VAL
        hd[fields.BODY_KEY] = self._get_body_phash()
        hd[fields.PARTS_MAP_KEY] = \
            walk.walk_msg_tree(walk.get_parts(self._mime_multipart), body_phash=self._get_body_phash())['part_map']

        self._hd = hd
        return hd
Пример #6
0
if len(sys.argv) > 1:
    FILENAME = sys.argv[1]
else:
    FILENAME = "rfc822.multi-signed.message"

"""
FILENAME = "rfc822.plain.message"
FILENAME = "rfc822.multi-minimal.message"
"""

msg = p.parse(open(FILENAME))
DO_CHECK = False
#################################################

parts = W.get_parts(msg)

if DEBUG:
    def trim(item):
        item = item[:10]
    [trim(part["phash"]) for part in parts if part.get('phash', None)]

raw_docs = list(W.get_raw_docs(msg, parts))

body_phash_fun = [W.get_body_phash_simple,
                  W.get_body_phash_multi][int(msg.is_multipart())]
body_phash = body_phash_fun(W.get_payloads(msg))
parts_map = W.walk_msg_tree(parts, body_phash=body_phash)


# TODO add missing headers!
Пример #7
0
def _parse_msg(raw):
    msg = message_from_string(raw)
    parts = walk.get_parts(msg)
    chash = sha256.SHA256(raw).hexdigest()
    multi = msg.is_multipart()
    return msg, parts, chash, multi
Пример #8
0
# Input from hell

if len(sys.argv) > 1:
    FILENAME = sys.argv[1]
else:
    FILENAME = "rfc822.multi-signed.message"
"""
FILENAME = "rfc822.plain.message"
FILENAME = "rfc822.multi-minimal.message"
"""

msg = p.parse(open(FILENAME))
DO_CHECK = False
#################################################

parts = W.get_parts(msg)

if DEBUG:

    def trim(item):
        item = item[:10]

    [trim(part["phash"]) for part in parts if part.get('phash', None)]

raw_docs = list(W.get_raw_docs(msg, parts))

body_phash_fun = [W.get_body_phash_simple,
                  W.get_body_phash_multi][int(msg.is_multipart())]
body_phash = body_phash_fun(W.get_payloads(msg))
parts_map = W.walk_msg_tree(parts, body_phash=body_phash)
Пример #9
0
def _parse_msg(raw):
    msg = message_from_string(raw)
    parts = walk.get_parts(msg)
    chash = sha256.SHA256(raw).hexdigest()
    multi = msg.is_multipart()
    return msg, parts, chash, multi
Пример #10
0
def _parse_msg(raw):
    msg = message_from_string(raw)
    parts = walk.get_parts(msg)
    chash = walk.get_hash(raw)
    multi = msg.is_multipart()
    return msg, parts, chash, multi