def _parse_mimepart(self, mimepart, mid, index, namespace_id): """Parse a single MIME part into a Block and Part object linked to this message.""" from inbox.models.block import Block, Part disposition, disposition_params = mimepart.content_disposition if (disposition is not None and disposition not in ['inline', 'attachment']): cd = mimepart.content_disposition log.error('Unknown Content-Disposition', mid=mid, bad_content_disposition=cd, parsed_content_disposition=disposition) self._mark_error() return block = Block() block.namespace_id = namespace_id block.content_type = mimepart.content_type.value block.filename = _trim_filename( mimepart.content_type.params.get('name'), mid) new_part = Part(block=block) new_part.walk_index = index # TODO maybe also trim other headers? if disposition is not None: new_part.content_disposition = disposition if disposition == 'attachment': new_part.block.filename = _trim_filename( disposition_params.get('filename'), mid) if mimepart.body is None: data_to_write = '' elif new_part.block.content_type.startswith('text'): data_to_write = mimepart.body.encode('utf-8', 'strict') # normalize mac/win/unix newlines data_to_write = data_to_write.replace('\r\n', '\n'). \ replace('\r', '\n') else: data_to_write = mimepart.body if data_to_write is None: data_to_write = '' new_part.content_id = mimepart.headers.get('Content-Id') block.data = data_to_write # Wait until end so we don't create incomplete blocks/parts for MIME # parts which fail to parse. new_part.message = self
def _parse_mimepart(self, mimepart, mid, index, namespace_id): """Parse a single MIME part into a Block and Part object linked to this message.""" from inbox.models.block import Block, Part disposition, disposition_params = mimepart.content_disposition if (disposition is not None and disposition not in ['inline', 'attachment']): cd = mimepart.content_disposition log.error('Unknown Content-Disposition', mid=mid, bad_content_disposition=cd, parsed_content_disposition=disposition) self._mark_error() return block = Block() block.namespace_id = namespace_id block.content_type = mimepart.content_type.value block.filename = _trim_filename( mimepart.content_type.params.get('name'), mid) new_part = Part(block=block, message=self) new_part.walk_index = index # TODO maybe also trim other headers? if disposition is not None: new_part.content_disposition = disposition if disposition == 'attachment': new_part.block.filename = _trim_filename( disposition_params.get('filename'), mid) if mimepart.body is None: data_to_write = '' elif new_part.block.content_type.startswith('text'): data_to_write = mimepart.body.encode('utf-8', 'strict') # normalize mac/win/unix newlines data_to_write = data_to_write.replace('\r\n', '\n'). \ replace('\r', '\n') else: data_to_write = mimepart.body if data_to_write is None: data_to_write = '' new_part.content_id = mimepart.headers.get('Content-Id') block.data = data_to_write
def _parse_mimepart(self, mimepart, mid, index, namespace_id): """Parse a single MIME part into a Block and Part object linked to this message.""" from inbox.models.block import Block, Part block = Block() block.namespace_id = namespace_id block.content_type = mimepart.content_type.value block.filename = _trim_filename( mimepart.content_type.params.get('name'), mid) new_part = Part(block=block, message=self) new_part.walk_index = index # TODO maybe also trim other headers? if mimepart.content_disposition[0] is not None: value, params = mimepart.content_disposition if value not in ['inline', 'attachment']: cd = mimepart.content_disposition log.error('Unknown Content-Disposition', mid=mid, bad_content_disposition=cd, parsed_content_disposition=value) return else: new_part.content_disposition = value if value == 'attachment': new_part.block.filename = _trim_filename( params.get('filename'), mid) if mimepart.body is None: data_to_write = '' elif new_part.block.content_type.startswith('text'): data_to_write = mimepart.body.encode('utf-8', 'strict') # normalize mac/win/unix newlines data_to_write = data_to_write.replace('\r\n', '\n'). \ replace('\r', '\n') else: data_to_write = mimepart.body if data_to_write is None: data_to_write = '' new_part.content_id = mimepart.headers.get('Content-Id') block.data = data_to_write self.parts.append(new_part)
def __init__(self, account=None, mid=None, folder_name=None, received_date=None, flags=None, body_string=None, *args, **kwargs): """ Parses message data and writes out db metadata and MIME blocks. Returns the new Message, which links to the new Block objects through relationships. All new objects are uncommitted. Threads are not computed here; you gotta do that separately. Parameters ---------- mid : int The account backend-specific message identifier; it's only used for logging errors. raw_message : str The full message including headers (encoded). """ _rqd = [account, mid, folder_name, flags, body_string] MailSyncBase.__init__(self, *args, **kwargs) # for drafts if not any(_rqd): return if any(_rqd) and not all([v is not None for v in _rqd]): raise ValueError( "Required keyword arguments: account, mid, folder_name, " "flags, body_string") # stop trickle-down bugs assert account.namespace is not None assert not isinstance(body_string, unicode) try: parsed = mime.from_string(body_string) mime_version = parsed.headers.get('Mime-Version') # sometimes MIME-Version is "1.0 (1.0)", hence the .startswith() if mime_version is not None and not mime_version.startswith('1.0'): log.warning('Unexpected MIME-Version', account_id=account.id, folder_name=folder_name, mid=mid, mime_version=mime_version) self.data_sha256 = sha256(body_string).hexdigest() # clean_subject strips re:, fwd: etc. self.subject = parsed.clean_subject self.from_addr = parse_email_address_list( parsed.headers.get('From')) self.sender_addr = parse_email_address_list( parsed.headers.get('Sender')) self.reply_to = parse_email_address_list( parsed.headers.get('Reply-To')) self.to_addr = parse_email_address_list( parsed.headers.getall('To')) self.cc_addr = parse_email_address_list( parsed.headers.getall('Cc')) self.bcc_addr = parse_email_address_list( parsed.headers.getall('Bcc')) self.in_reply_to = parsed.headers.get('In-Reply-To') self.message_id_header = parsed.headers.get('Message-Id') self.received_date = received_date if received_date else \ get_internaldate(parsed.headers.get('Date'), parsed.headers.get('Received')) # Custom Inbox header self.inbox_uid = parsed.headers.get('X-INBOX-ID') # In accordance with JWZ (http://www.jwz.org/doc/threading.html) self.references = parse_references( parsed.headers.get('References', ''), parsed.headers.get('In-Reply-To', '')) self.size = len(body_string) # includes headers text i = 0 # for walk_index from inbox.models.block import Part # Store all message headers as object with index 0 headers_part = Part() headers_part.namespace_id = account.namespace.id headers_part.message = self headers_part.walk_index = i headers_part.data = json.dumps(parsed.headers.items()) self.parts.append(headers_part) for mimepart in parsed.walk( with_self=parsed.content_type.is_singlepart()): i += 1 if mimepart.content_type.is_multipart(): log.warning('multipart sub-part found', account_id=account.id, folder_name=folder_name, mid=mid) continue # TODO should we store relations? new_part = Part() new_part.namespace_id = account.namespace.id new_part.message = self new_part.walk_index = i new_part.content_type = mimepart.content_type.value new_part.filename = _trim_filename( mimepart.content_type.params.get('name'), account.id, mid) # TODO maybe also trim other headers? if mimepart.content_disposition[0] is not None: value, params = mimepart.content_disposition if value not in ['inline', 'attachment']: log.error('Unknown Content-Disposition', account_id=account.id, mid=mid, folder_name=folder_name, bad_content_disposition= mimepart.content_disposition, parsed_content_disposition=value) continue else: new_part.content_disposition = value if value == 'attachment': new_part.filename = _trim_filename( params.get('filename'), account.id, mid) if mimepart.body is None: data_to_write = '' elif new_part.content_type.startswith('text'): data_to_write = mimepart.body.encode('utf-8', 'strict') # normalize mac/win/unix newlines data_to_write = data_to_write \ .replace('\r\n', '\n').replace('\r', '\n') else: data_to_write = mimepart.body if data_to_write is None: data_to_write = '' new_part.content_id = mimepart.headers.get('Content-Id') new_part.data = data_to_write self.parts.append(new_part) self.calculate_sanitized_body() except mime.DecodingError: # Occasionally iconv will fail via maximum recursion depth. We # still keep the metadata and mark it as b0rked. _log_decode_error(account.id, folder_name, mid, body_string) log.error('Message parsing DecodeError', account_id=account.id, folder_name=folder_name, err_filename=_get_errfilename( account.id, folder_name, mid)) self.mark_error() return except AttributeError: # For EAS messages that are missing Date + Received headers, due # to the processing we do in inbox.util.misc.get_internaldate() _log_decode_error(account.id, folder_name, mid, body_string) log.error('Message parsing AttributeError', account_id=account.id, folder_name=folder_name, err_filename=_get_errfilename( account.id, folder_name, mid)) self.mark_error() return except RuntimeError: _log_decode_error(account.id, folder_name, mid, body_string) log.error('Message parsing RuntimeError<iconv>'.format( err_filename=_get_errfilename(account.id, folder_name, mid))) self.mark_error() return
def __init__(self, account=None, mid=None, folder_name=None, received_date=None, flags=None, body_string=None, *args, **kwargs): """ Use .create() instead to handle common errors! (Can't abort object creation in a constructor.) """ _rqd = [account, mid, folder_name, received_date, flags, body_string] # for drafts - skip parsing if not any(_rqd): MailSyncBase.__init__(self, *args, **kwargs) return parsed = mime.from_string(body_string) mime_version = parsed.headers.get('Mime-Version') # NOTE: sometimes MIME-Version is set to "1.0 (1.0)", hence the # .startswith if mime_version is not None and not mime_version.startswith('1.0'): log.error('Unexpected MIME-Version: {0}'.format(mime_version)) self.data_sha256 = sha256(body_string).hexdigest() # clean_subject strips re:, fwd: etc. self.subject = parsed.clean_subject self.from_addr = parse_email_address_list( parsed.headers.get('From')) self.sender_addr = parse_email_address_list( parsed.headers.get('Sender')) self.reply_to = parse_email_address_list( parsed.headers.get('Reply-To')) self.to_addr = parse_email_address_list( parsed.headers.getall('To')) self.cc_addr = parse_email_address_list( parsed.headers.getall('Cc')) self.bcc_addr = parse_email_address_list( parsed.headers.getall('Bcc')) self.in_reply_to = parsed.headers.get('In-Reply-To') self.message_id_header = parsed.headers.get('Message-Id') self.received_date = received_date # Optional mailing list headers self.mailing_list_headers = parse_ml_headers(parsed.headers) # Custom Inbox header self.inbox_uid = parsed.headers.get('X-INBOX-ID') # In accordance with JWZ (http://www.jwz.org/doc/threading.html) self.references = parse_references( parsed.headers.get('References', ''), parsed.headers.get('In-Reply-To', '')) self.size = len(body_string) # includes headers text i = 0 # for walk_index from inbox.models.block import Part # Store all message headers as object with index 0 headers_part = Part() headers_part.namespace_id = account.namespace.id headers_part.message = self headers_part.walk_index = i headers_part.data = json.dumps(parsed.headers.items()) self.parts.append(headers_part) for mimepart in parsed.walk( with_self=parsed.content_type.is_singlepart()): i += 1 if mimepart.content_type.is_multipart(): log.warning("multipart sub-part found! on {}" .format(self.g_msgid)) continue # TODO should we store relations? new_part = Part() new_part.namespace_id = account.namespace.id new_part.message = self new_part.walk_index = i new_part.misc_keyval = mimepart.headers.items() # everything new_part.content_type = mimepart.content_type.value new_part.filename = _trim_filename( mimepart.content_type.params.get('name'), log=log) # TODO maybe also trim other headers? if mimepart.content_disposition[0] is not None: value, params = mimepart.content_disposition if value not in ['inline', 'attachment']: errmsg = """ Unknown Content-Disposition on message {0} found in {1}. Bad Content-Disposition was: '{2}' Parsed Content-Disposition was: '{3}'""".format( mid, folder_name, mimepart.content_disposition) log.error(errmsg) continue else: new_part.content_disposition = value if value == 'attachment': new_part.filename = _trim_filename( params.get('filename'), log=log) if mimepart.body is None: data_to_write = '' elif new_part.content_type.startswith('text'): data_to_write = mimepart.body.encode('utf-8', 'strict') # normalize mac/win/unix newlines data_to_write = data_to_write \ .replace('\r\n', '\n').replace('\r', '\n') else: data_to_write = mimepart.body if data_to_write is None: data_to_write = '' new_part.content_id = mimepart.headers.get('Content-Id') new_part.data = data_to_write self.parts.append(new_part) self.calculate_sanitized_body() MailSyncBase.__init__(self, *args, **kwargs)