def parse_attachment(part): """ Get attachments of an email :param `Message` part: A `Message` :rtype: list :returns: The list of attachments """ attachment = {} attachment['content_type'] = part.get_content_type() if attachment['content_type'].lower() in ['message/rfc822', 'message/delivery-status']: attachment['content'] = str(part) else: attachment['content'] = part.get_payload(decode=True) filename = part.get_filename() if not filename: filename = hashlib.sha1(attachment['content']).hexdigest() if attachment['content_type']: extension = mimetypes.guess_extension(attachment['content_type']) if extension: filename += extension attachment['filename'] = utils.decode_every_charset_in_the_world(filename) return attachment
def get_received_from_headers(headers): """ Parse 'received from' from headers :param `Message` headers: The SMTP headers of the email :rtype: list :returns: The list of received emails """ result = [] if headers.get('Received'): for current in headers.get_all('Received'): if re.search('^from', current): # Only keep dns and ip once current = utils.decode_every_charset_in_the_world(current) final = current.splitlines()[0].split(";")[0].replace("from ", "").replace("localhost", "").replace("by", "") result.append(final) return result
def get_subject_from_headers(headers): """ Get the subject of an email :param `Message` headers: The SMTP headers of the email :rtype: str :returns: The subject of the email """ subject = '' subject_part = [] if 'Subject' in headers and headers['Subject'] is not None: try: decodefrag = decode_header(headers['Subject']) except HeaderParseError: return subject for line, encoding in decodefrag: enc = 'utf-8' if encoding is None or encoding == 'unknown' else encoding subject_part.append(utils.decode_every_charset_in_the_world(line, enc)) subject = ''.join(subject_part)[:1023] return subject
def get_body_and_attachments(self, raw): """ Get the body of the mail and retreive attachments :param str raw: The raw email :rtype: tuple :returns: The decoded body and a list of attachments """ messages = self._parser.parsestr(raw) attachments = [] body = '' for message in messages.walk(): content_type = message.get_content_type().lower() if message.is_multipart() and content_type != 'message/rfc822': continue content_disposition = message.get('Content-Disposition') if content_disposition: content_disposition = content_disposition.decode('utf-8').encode('utf-8') if content_disposition: disposition = content_disposition.decode('utf-8').strip().split(';')[0].lower() if disposition == 'attachment': attachments.append(parse_attachment(message)) continue elif disposition == 'inline' and content_type not in ['text/html', 'text/plain']: attachments.append(parse_attachment(message)) continue content = message.get_payload(decode=True) if not content: content = message.as_string() if content: body += utils.decode_every_charset_in_the_world(content, message.get_content_charset()) return body, attachments