def get_email(num, conn): result = {} typ, content = conn.fetch(num, '(RFC822)') msg = BytesParser().parsebytes(content[0][1]) sub = msg.get('Subject') from_ = msg.get("From") # Body details result["From"] = decode_str(from_, "From") result["Subject"] = decode_str(sub, "Subject") result["File"] = [] for part in msg.walk(): if part.get_content_type() == "text/plain": body = part.get_payload(decode=True) charsets = part.get_charsets() result["Body"] = body.decode(charsets[0]) fileName = part.get_filename() if None != fileName: file_dict = {} file_dict["name"] = decode_str(fileName, "File") file_dict["attachment"] = part.get_payload(decode=True) file_dict["content_type"] = part.get_content_type() new_file = ContentFile(file_dict["attachment"]) file_obj = UploadedFile(new_file, file_dict["name"], file_dict["content_type"], new_file.size, None, None) result["File"].append(file_obj) # fileName_str = decode_str(fileName,"File") # att_path = os.path.join(settings.LOG_DIR,fileName_str) #result["File"] = part.get_payload(decode=True) # fp = open(att_path, 'wb') # fp.write(part.get_payload(decode=True)) # fp.close() return result
def get_content(self, data, _path) -> str: ''' get email content parsed ''' with open(_path, 'rb') as file: msg = BytesParser(policy=policy.default).parse(file) data["Parsed"] = msg.get_body(preferencelist=('plain')).get_content()
def __init__(self, mail_data, mysql_creds, threshold, sensitivity, account, logger, mail_id, spam_folder): self.JS_IMPORT_REGEX = r'/<script.*(?:src="(.*)").*>/s' self.JS_EXTRACT_REGEX = r'/<script.*>(.*?)<\/script>/s' self.URL_REGEX = "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|[^\x00-\x7F]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" self.parser = BytesParser() self.sensitivity = sensitivity self.threshold = threshold self.log = logger self.spam_folder = spam_folder self.mysql_db = mysql.connector.connect( user=mysql_creds["mysql_username"], password=mysql_creds["mysql_password"], database=mysql_creds["mysql_database"], host=mysql_creds["mysql_host"]) self.account = account self.spam_points = 0 self.js_code = {} self.urls_in_document = [] self.documents = {} self.mail_id = mail_id # The headers are defined as <key>:<to_remove_from key> # -1 is used to define the last header, after that comes the mail contents self.whitelisted = False self.blacklisted = False self.parsed_mail = self.parser.parsebytes(mail_data) self.header_data = dict(self.parsed_mail) self.message = "" self.extract_message() self._spam = -1 self.check_whitelist() self.check_blacklisted() self.urls = re.findall(self.URL_REGEX, self.message) for i in range(len(self.urls)): self.urls[i] = self.urls[i].strip()
def Receive(self, index): self.server = poplib.POP3_SSL(self.emailInfo["pop3_server"]) # 身份认证: self.server.user(self.emailInfo["email"]) self.server.pass_(self.emailInfo["pwd"]) if index > 6: for i in range(index, index - 6, -1): msg_content = '' resp, lines, octets = self.server.retr(i) msg_content = b'\r\n'.join(lines) # # 稍后解析出邮件: msg = BytesParser().parsebytes(msg_content) self.print_info(msg) # 可以根据邮件索引号直接从服务器删除邮件: # server.dele(i) # 关闭连接: else: for i in range(index, 0, -1): msg_content = '' resp, lines, octets = self.server.retr(i) # lines存储了邮件的原始文本的每一行, # 可以获得整个邮件的原始文本: msg_content = b'\r\n'.join(lines) # 稍后解析出邮件: msg = BytesParser().parsebytes(msg_content) self.print_info(msg) self.server.quit()
def preview_held_msg(self, datacomponent): # datacomponent as defined/used by simple_term_menu. see self.get_held_items() s = datacomponent.split(':') # we already know s[1]=="HELD" lista = s[0] rid = s[2] # Get the list mmlist = self.mmclient.get_list(lista) # From it, get held message by request_id msg = mmlist.get_held_message(rid) # Extract necessary details sender = msg._get('sender') subject = msg._get('subject') msgid = msg._get('message_id') reason = msg._get('reason') # MIMEparse msg._get('msg'): mp = BytesParser(policy=policy.default).parsebytes( msg._get('msg').encode('utf8')) preview_text = mp.get_body(preferencelist=('plain')).get_content() pre = """{t_full_subject}: {subject} {t_msgid}: {msgid} {t_reason}: {reason} {preview_text}""".format(t_full_subject=_T('FULL SUBJECT'), t_msgid=_T('MESSAGE ID'), t_reason=_T('REASON'), sender=sender, subject=subject, msgid=msgid, reason=reason, preview_text=preview_text) return (pre)
def fillUp(self): fruits= [] #status= open("status.remi","r",encoding="utf8") #self.myEmails #if self.connected==False: self.connect() result, data = self.mail.uid('search', None, "ALL") # search and return uids instead id_list = data[0].split() for latest_email_uid in id_list[-100::1]: uniqueEmail=repr(latest_email_uid) if False: pass else: result, data = self.mail.uid('fetch', latest_email_uid, '(RFC822)') raw_email = data[0][1] # here's the body, which is raw text of the whole email # including headers and alternate payloads #Parsing manager=BytesParser() email_message = manager.parsebytes(raw_email) try: message_juice= email_message.get_payload(decode=False) while type(message_juice)==type([1,2]) and type(message_juice[0].get_payload(decode=False))==type([1,2]): message_juice= message_juice[0].get_payload(decode=False) if type(message_juice)==type([1,2]): if message_juice[-1].get_filename() == None: html_message_juice= message_juice[-1].get_payload(decode=True) else: html_message_juice= message_juice[0].get_payload(decode=True) else: html_message_juice= email_message.get_payload(decode=True) try: #fruits.append(html_message_juice.decode()) ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8") ssd.write(html_message_juice.decode()) ssd.close() #newBlog= Blog(title=email_message['Subject'], body= html_message_juice.decode()) #newBlog.save() #self.setData(self,uniqueID=uniqueEmail) #string of latest_email_uid except: #fruits.append(html_message_juice.decode('windows-1251')) ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8") ssd.write(html_message_juice.decode('windows-1251')) ssd.close() #newBlog= Blog(title=email_message['Subject'], body= html_message_juice.decode('windows-1251')) #newBlog.save() #self.setData(self,uniqueID=uniqueEmail) #string of latest_email_uid except: #fruits.append("This email could not be processed see what happened \n\nSubject: "+email_message['Subject']) ssd= open("Data/"+str(latest_email_uid)+".html","w",encoding="utf8") ssd.write("This email could not be processed see what happened \n\nSubject: "+email_message['Subject']) ssd.close()
def parse(self, bytesfile): p = Parser() msgobj = p.parse(bytesfile) subject = self.parse_header_field(msgobj['Subject']) body, html, attachments = self.parse_body(msgobj.walk()) body = '\n'.join(body) html = '\n'.join(html) tos = self.get_address_list(msgobj.get_all('To', [])) tos.extend(self.get_address_list(msgobj.get_all('X-Original-To', []))) ccs = self.get_address_list(msgobj.get_all('Cc', [])) resent_tos = self.get_address_list(msgobj.get_all('resent-to', [])) resent_ccs = self.get_address_list(msgobj.get_all('resent-cc', [])) from_field = parseaddr(self.get(msgobj.get('From'))) from_field = (self.parse_header_field(from_field[0]), from_field[1].lower() if from_field[1] else from_field[1]) date = self.parse_date(self.get(msgobj.get("Date"))) return { 'msgobj': msgobj, 'message_id': msgobj.get('Message-Id'), 'date': date, 'subject': subject, 'body': body, 'html': html, 'from': from_field, 'to': tos, 'cc': ccs, 'resent_to': resent_tos, 'resent_cc': resent_ccs, 'attachments': attachments }
def get_text_with_eml(self) -> str: file_list = glob.glob('*.eml') # returns list of files with open(file_list[2], 'rb') as fp: # select a specific email file from the list msg = BytesParser(policy=policy.default).parse(fp) return msg.get_body(preferencelist=('plain')).get_content()
def analyse(self, sample, samplename): try: def json_serial(obj): if isinstance(obj, datetime.datetime): serial = obj.isoformat() return serial with open(sample, 'rb') as fhdl: raw_email = fhdl.read() #Lets Grab All Dem Headers ep = eml_parser.EmlParser() parsed_eml = ep.decode_email_bytes(raw_email) jsonEML = json.dumps(parsed_eml, default=json_serial) jsonEML = json.loads(jsonEML) #Grab the email body and pass into the report with open(sample, 'rb') as fp: msg = BytesParser(policy=policy.default).parse(fp) text = msg.get_body(preferencelist=('plain')).get_content() self.build_report(jsonEML, text) except Exception as e: self.unexpectedError(e)
class RibbitResponse: """ A response to a RibbitRequest. The request that created that response is available on the .request attribute. """ def __init__( self, request: RibbitRequest, data: bytes, *, verify: bool = True ) -> None: self.request = request self.data = data self.date = datetime.utcnow() self.message = BytesParser().parsebytes(data) # type: ignore # (typeshed#2502) self.checksum = parse_checksum(self.message.epilogue) # The bytes of everything except the checksum (the epilogue) # The checksum is of those bytes self.content_bytes = data[:-len(self.message.epilogue)] if verify: content_checksum = sha256(self.content_bytes).hexdigest() if self.checksum != content_checksum: raise IntegrityVerificationError("ribbit response", content_checksum, self.checksum) self.content = self.message.get_payload(0).get_payload() self.signature = self.message.get_payload(1).get_payload()
def process_email(raw_email): msg = BytesParser(policy=policy.default).parsebytes(raw_email) body = msg.get_body(preferencelist=['plain']) content = body.get_payload(decode=True) charset = body.get_content_charset() if not charset: charset = chardet.detect(content)['encoding'] content = content.decode(charset) regex = re.compile('^[^+@]+\+(?P<token>[a-zA-Z0-9]{80})@[^@]+$') for addr in msg.get('To', '').split(','): m = regex.match(addr.strip()) if m: break if not m: raise NoTokenFoundException token = m.group('token') try: in_reply_to, author = process_new_token(token) except InvalidTokenException: in_reply_to, author = process_old_token(token) subject = msg.get('Subject', '') Message.objects.create(thread=in_reply_to.thread, in_reply_to=in_reply_to, author=author, subject=subject, content=content)
def _encode_parts(self, header_data, msg_data, encoder): """Encodes any MIME part in the current message that is 8-bit. :type header_data: :py:obj:`bytes` :type msg_data: :py:obj:`bytes` """ self.headers = None self.message = None if six.PY3: msg = BytesParser().parsebytes(header_data+msg_data) else: msg = Parser().parsestr(header_data+msg_data) for part in msg.walk(): if not part.is_multipart(): payload = part.get_payload() try: payload.encode('ascii') except UnicodeError: del part['Content-Transfer-Encoding'] encoder(part) self.parse_msg(msg)
def make_person_schema(mailFile, outputDir, person_db): msg = BytesParser().parse(mailFile) # Retrieve the from person. (realname, mailAddr) = get_info_from_mail_field(msg['from']) person = Person(realname, mailAddr) # Add it to the database. update_db(person_db, person) # Find ourself (my_name, my_email) = get_info_from_mail_field(msg['Delivered-To']) me = Person(my_name, my_email) def addToMyEmailAddr(field_name): (_, my_email_addr) = get_info_from_mail_field(msg[field_name]) if my_email_addr: me.addEmail(my_email_addr) addToMyEmailAddr('X-Original-To') addToMyEmailAddr('Resent-From') update_db(person_db, me) # Find cc and to relation (excluding ourself) link_people(person_db, me, msg.get_all('to', [])) link_people(person_db, me, msg.get_all('cc', []))
def fetch_and_parse(uids): ''' fetches and parses up to "commit_limit" new emails ''' result = list() for uid in uids: email_dict = dict() reply, email_data = imap_server.uid('fetch', uid, '(RFC822)') if reply == 'OK': raw_email = email_data[0][1] email = BytesParser(policy=default).parsebytes(raw_email) email_dict['Date'] = datetime.strptime( email['Date'], '%a, %d %b %Y %H:%M:%S %z') for header in [ 'From', 'To', 'Delivered-To', 'Message-ID', 'Subject' ]: email_dict[header] = email[header] email_dict['plain'] = None email_dict['html'] = None for part in email.walk(): if part.get_content_type() == 'text/html': email_dict['html'] = part.get_body().get_content() elif part.get_content_type() == 'text/plain': email_dict['plain'] = part.get_body().get_content() result.append(email_dict) return result
def __init__(self, data): """ Cribbed heavily from https://www.ianlewis.org/en/parsing-email-attachments-python """ Loggable.__init__(self) self.raw = data self.attachments = [] self.recipients = [] message = BytesParser(policy=policy.default).parsebytes(self.raw) self.hash = hashlib.sha512(data).hexdigest() self.sender = parseaddr(str(message["From"]))[1].lower() self.subject = str(message["Subject"]).replace("\r\n", "") # Prefer plain text and strip everything south of the signature. Note # that I'm not sure what will happen here if you send an HTML-only # email. self.body = "\n\n".join( re.sub(r"\r?\n\r?\n-- \r?\n.*", "", str( message.get_body( preferencelist=('plain', 'related', 'html') ) ), flags=re.DOTALL).split("\n\n")[1:] ) self._set_recipients(message) self._set_time(message) self._set_attachments(message) self.logger.info('Consuming email: "{}"'.format(self.subject))
def get_content(num): print(num) type, data = raw_conn.fetch(num, '(RFC822)') email_date = get_date(email_list[int(count)]) try: msg = BytesParser().parsebytes(data[0][1]) for part in msg.walk(): if not part.is_multipart(): charset = part.get_charset() contenttype = part.get_content_type() content = part.get_payload(decode=True) content = content.decode('GBK') temp = time_formate(email_date) print(temp) if temp == '1': print(temp) get_transfer_v1(content) elif temp == '2': print(temp) get_transfer_v2(content) # #print (content) except TypeError: print('empty-email') except UnicodeDecodeError: print('hahah')
def get_mail_content(self, file_name): # msg = email.message_from_file(open('sample.eml')) with open(file_name, 'rb') as fp: msg = BytesParser(policy=policy.default).parse(fp) text = msg.get_body(preferencelist=('plain')).get_content() fp.close() return text
def parse_body(body): """ Parse the body from the email and extract the required fields. Need to extract sender email, subject of the email, the receive date, and body of the email. """ msg = BytesParser(policy=policy.SMTP).parsebytes(body) print("This is the message: ", msg.keys()) print("From : ",msg['From']) print("Date: ",msg['Date']) print("To: ",msg['To']) print("Subject : ",msg['Subject']) plain = '' try: plain = msg.get_body(preferencelist=('plain')) plain = ''.join(plain.get_content().splitlines(keepends=True)) plain = '' if plain == None else plain except: print('Incoming message does not have an plain text part - skipping this part.') return { 'from': msg['From'], 'to': msg['To'], 'subject': msg['Subject'], 'date': msg['Date'], 'text':plain }
def processEmail(emailBytes): try: msg = BytesParser(policy=policy.default).parse(io.BytesIO(emailBytes)) text = msg.get_body(preferencelist=('plain')).get_content() text = emailBytes.decode() except Exception as e: text = emailBytes.decode() lines = text.split('\n') if 'Subject:' in lines[0]: subject = lines[0][8:] else: subject = '' if subject != '': text = ' '.join(lines) else: text = ' '.join(lines[1:]) # print(f'Pre-formatted text: {text}') text = re.sub(r'https?://\S+', '', text, flags=re.MULTILINE) # remove links text = re.sub(r' +|\t+|\\n', ' ', text) # remove unnecessary spaces text = re.sub(r'\s([,?.!"](?:\s|$))', r'\1', text) # remove spaces before punctuation # print(f'Text: {text}') # Check if text is empty before forwarding return subject, text
def query_S3(bucket, objkey): s3 = boto3.resource('s3') bucket = s3.Bucket(bucket) body = "" for obj in bucket.objects.all(): key = obj.key if key == objkey: body = obj.get()['Body'].read() #print(body) raw_email = body msg = BytesParser(policy=policy.SMTP).parsebytes(body) # get the plain text version of the email plain = '' try: plain = msg.get_body(preferencelist=('plain')) plain = ''.join(plain.get_content().splitlines(keepends=True)) plain = '' if plain == None else plain except: print( 'Incoming message does not have an plain text part - skipping this part.' ) #print("This is the plaintext : ",plain) return plain
def _get_email_content(uid, data): content = dict(text=None, html=None, attachments=[]) email = BytesParser(policy=policy.default).parsebytes(data) for part in email.walk(): if part.is_multipart(): continue if part.is_attachment(): content['attachments'].append(_read_attachment(part, uid)) continue if part.get_content_type() == 'text/plain': content['text'] = _read_text(part) continue if part.get_content_type() == 'text/html': content['html'] = _read_html(part, uid) continue if content['html'] and not content['text']: tmp = open(content['html'], 'r') content['text'] = tmp.read() tmp.close() return content
def _login_btn_clicked(self): # print("Clicked") username = self.input_User.get() password = self.input_Pass.get() print(username, password) #conexion a servicios de gmail M = poplib.POP3_SSL('pop.gmail.com') M.user(username) M.pass_(password) #obtiene el numero de mensaje numero = len(M.list()[1]) #Obtiene mensaje global response, headerLines, bytes for i in range(numero): # Se lee el mensaje response, headerLines, bytes = M.retr(i + 1) #se mete todo en un string mensaje = b'\n'.join(headerLines) #se parsea # Se parsea el mensaje p = BytesParser() email = p.parsebytes(mensaje) #crea nueva ventana self.new_window(email)
def _get_content(self): # self.content is provided by __getattr__ through the cache var self._content p = BytesParser() content = self.content content_io = BytesIO(content) parsed_msg = p.parse(content_io) return parsed_msg
def process_email(raw_email): msg = BytesParser(policy=policy.default).parsebytes(raw_email) body = msg.get_body(preferencelist=['plain']) content = body.get_payload(decode=True) charset = body.get_content_charset() if not charset: charset = chardet.detect(content)['encoding'] content = content.decode(charset) regex = re.compile('^[^+@]+\+(?P<token>[a-zA-Z0-9]{80})@[^@]+$') for addr in msg.get('To', '').split(','): m = regex.match(addr.strip()) if m: break if not m: raise NoTokenFoundException token = m.group('token') key = token[64:] try: thread = MessageThread.objects.get(token=token[:32]) sender = MessageCorrespondent.objects.get(token=token[32:64]) except models.DoesNotExist: raise InvalidTokenException if key != hexdigest_sha256(settings.SECRET_KEY, thread.token, sender.token)[:16]: raise InvalidKeyException Message.objects.create(thread=thread, from_email=sender.email, content=content)
def decode_email( msg_str, pos, key_map ): # process whole email parts and build email list/dict records filenames = None p = BytesParser() message = p.parsebytes(msg_str) # get header parts = parse_parts(message, key_map) # add header parts specified in key_map parts['Size'] = len(msg_str) plain_body = '' html_body = '' for part in message.walk(): plain_body += decode_part(part, 'text/plain') if len(plain_body) > 0: html_body = "" else: html_body += decode_part(part, 'text/html') fn = part.get_filename() if fn: if filenames == None: filenames = [] filenames.append(fn) if filenames: parts['Attachments'] = filenames if len(plain_body) > 0: parts['text/plain'] = plain_body elif len(html_body) > 0: parts['text/html'] = html_body return parts
def test_prepend_headerfields_encoded(): # we cope with non-ascii encodings in raw strings msg = BytesParser(policy=default_policy).parsebytes( 'Subject: föö'.encode('utf-8')) assert msg.get_all("Subject")[0] == "föö" result = pgp.prepend_header_fields(msg, [("To", "foo"), ("From", "bar")]) assert result.items() == [('To', 'foo'), ('From', 'bar'), ('Subject', 'föö')]
def parse1(request_text=None, file_path=None): # TODO: 如果提供了file_path, 优先filepath if file_path: request_text = ParseReqHeader().get_request_text_by_file(file_path) request_line, headers_alone = request_text.split(b'\r\n', 1) headers = BytesParser().parsebytes(headers_alone) return {k: v for k, v in headers.items()}
def test_prepend_headerfields_as_header_objs(): # we cope with email.header.Header instances as headerfields msg = BytesParser(policy=compat32).parsebytes( 'Subject: föö'.encode('utf-8')) assert not isinstance(msg.get_all("Subject")[0], str) result = pgp.prepend_header_fields(msg, [("To", "foo"), ("From", "bar")]) assert result.items() == [('To', 'foo'), ('From', 'bar'), ('Subject', '=?unknown-8bit?b?ZsO2w7Y=?=')]
def load_email_messages(self, message_set): """ load_email_messages yields an EmailMessage for each email defined in message_set """ parser = BytesParser(policy=email.policy.default) for email_bytes in self.load_raw_emails(message_set): yield parser.parsebytes(text=email_bytes)
def read_em(): with open("dict/em.txt", 'rb') as fp: content = BytesParser(policy=default).parse(fp,headersonly=False) print('To: {}'.format(content['to'])) print('From: {}'.format(content['from'])) print('Subject: {}'.format(content['subject'])) print('Recipient username: {}'.format(content['to'].addresses[0].username)) print('Sender name: {}'.format(content['from'].addresses[0].display_name)) print('Body: {}'.format(content.get_body(preferencelist=('related', 'html', 'plain')).get_content()))
def get_mail_body(file_path): try: with open(file_path, "rb") as fp: msg = BytesParser(policy=policy.default).parse(fp) except IOError as error: print(error) exit(0) body = msg.get_body(preferencelist=('plain')).get_content() return body
def fillUp(modeladmin, request, queryset): for obj in queryset: #if self.connected==False: obj.connect() result, data = obj.mail.uid('search', None, "ALL") # search and return uids instead id_list = data[0].split() for latest_email_uid in id_list[-100::1]: result, data = obj.mail.uid('fetch', latest_email_uid, '(RFC822)') raw_email = data[0][1] # here's the body, which is raw text of the whole email # including headers and alternate payloads #Parsing manager = BytesParser() email_message = manager.parsebytes(raw_email) try: message_juice = email_message.get_payload(decode=False) while type(message_juice) == type([1, 2]) and type( message_juice[0].get_payload(decode=False)) == type( [1, 2]): message_juice = message_juice[0].get_payload(decode=False) if type(message_juice) == type([1, 2]): if message_juice[-1].get_filename() == None: html_message_juice = message_juice[-1].get_payload( decode=True) else: html_message_juice = message_juice[0].get_payload( decode=True) else: html_message_juice = email_message.get_payload(decode=True) try: newBlog = Blog(title=email_message['Subject'], body=html_message_juice.decode()) newBlog.save() except: newBlog = Blog( title=email_message['Subject'], body=html_message_juice.decode('windows-1251')) newBlog.save() except: newBlog = Blog( title=email_message['Subject'], body= "This email could not be processed see what happened \n\nSubject: " + email_message['Subject']) newBlog.save() pass obj.setData(repr(latest_email_uid))
def get_email_headers(message_bytes, headers=None): p = Parser() with closing(BytesIO(message_bytes)) as stream: msgobj = p.parse(stream) if headers is None: headers = dict(msgobj) return { k: [parse_header_field(x) for x in msgobj.get_all(k, [])] for k in headers }
def get_content(self, raw): data = base64.urlsafe_b64decode(raw) email_parser = EmailParser(policy=policy.default) email = email_parser.parsebytes(data) plain = email.get_body(preferencelist=('plain',)) body = None if plain: body = plain.get_payload() email_dict = dict(email) email_dict['body'] = body return email_dict
def __init__(self, data, verbosity=1): """ Cribbed heavily from https://www.ianlewis.org/en/parsing-email-attachments-python """ self.verbosity = verbosity self.subject = None self.time = None self.attachment = None message = BytesParser(policy=policy.default).parsebytes(data) self.subject = str(message["Subject"]).replace("\r\n", "") self.body = str(message.get_body()) self.check_subject() self.check_body() self._set_time(message) Log.info( 'Importing email: "{}"'.format(self.subject), Log.COMPONENT_MAIL) attachments = [] for part in message.walk(): content_disposition = part.get("Content-Disposition") if not content_disposition: continue dispositions = content_disposition.strip().split(";") if not dispositions[0].lower() == "attachment": continue file_data = part.get_payload() attachments.append(Attachment( b64decode(file_data), content_type=part.get_content_type())) if len(attachments) == 0: raise InvalidMessageError( "There don't appear to be any attachments to this message") if len(attachments) > 1: raise InvalidMessageError( "There's more than one attachment to this message. It cannot " "be indexed automatically." ) self.attachment = attachments[0]
def split_email(self, raw_email): parsed_email = BytesParser().parsebytes(raw_email) to_keep = [] attachments = [] if parsed_email.is_multipart(): for p in parsed_email.get_payload(): if p.get_filename(): filename = decode_header(p.get_filename()) if filename[0][1]: filename = filename[0][0].decode(filename[0][1]) else: filename = filename[0][0] attachments.append(File(p.get_payload(decode=True), filename)) else: to_keep.append(p) else: to_keep.append(parsed_email.get_payload()) return to_keep, attachments, parsed_email
def process_mailbox(self): rv, data = self.imap.uid('search',None, "ALL") if rv != 'OK': dbgprint("No messages found!") return #delete removed messages self.remove_deleted_msgs_from_history(data[0].split()) self.post_progress(50) #get last history uid lastmessage=0 if self.histcontainer.get_nr_elements('email') > 0: lastmessage = self.histcontainer.get_last_element('email').get_uid() #lastmessage = 35 dbgprint("last element uid: " + str(lastmessage)) #loop over all messages and download new ones if (int(data[0].split()[-1])-lastmessage) > 0 : progressstep = 30/(int(data[0].split()[-1])-lastmessage) progressactual = 50 for uid in data[0].split(): if int(uid) <= lastmessage: continue rv, data = self.imap.uid('fetch', uid, '(RFC822)') if rv != 'OK': dbgprint("ERROR getting message " + uid) continue #dbgprint("New message UID: "+ str(int(uid))) msg = email.message_from_bytes(data[0][1]) msg2 = BytesParser(policy=policy.default).parsebytes(data[0][1]) #dbgprint("BODY:",msg2.get_body(),"------------------") body = msg2.get_body(preferencelist=('plain', 'html')) el = self.histcontainer.make_element_from_message(int(uid),msg) self.process_body(str(body),el) self.process_attachments(msg,el) self.histcontainer.add_element(el) self.mark_msg_as_read(int(uid)) progressactual+=progressstep self.post_progress(progressactual)
def parse(self, bytesfile): p = Parser() msgobj = p.parse(bytesfile) body, html, attachments = parse_email_body(msgobj) body = '\n'.join(body).strip() html = '\n'.join(html).strip() if not body and html: body = convert_html_to_text(html) email_info = parse_main_headers(msgobj) email_info.update({ 'body': body, 'html': html, 'attachments': attachments }) return ParsedEmail(msgobj, **email_info)
def parse_attachment(self, message_part): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: dispo_type, dispo_dict = self.parse_dispositions(content_disposition) if dispo_type == "attachment" or (dispo_type == 'inline' and 'filename' in dispo_dict): content_type = message_part.get("Content-Type", None) file_data = message_part.get_payload(decode=True) if file_data is None: payloads = message_part.get_payload() file_data = '\n\n'.join([p.as_string() for p in payloads]) try: file_data = file_data.encode('utf-8') except: pass attachment = BytesIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.name = None attachment.create_date = None attachment.mod_date = None attachment.read_date = None if "filename" in dispo_dict: attachment.name = dispo_dict['filename'] if content_type: _, content_dict = self.parse_dispositions(content_type) if 'name' in content_dict: attachment.name = content_dict['name'] if attachment.name is None and content_type == 'message/rfc822': p = Parser() msgobj = p.parse(BytesIO(attachment.getvalue())) subject = self.parse_header_field(msgobj['Subject']) if subject: attachment.name = '%s.eml' % subject[:45] if "create-date" in dispo_dict: attachment.create_date = dispo_dict['create-date'] # TODO: datetime if "modification-date" in dispo_dict: attachment.mod_date = dispo_dict['modification-date'] # TODO: datetime if "read-date" in dispo_dict: attachment.read_date = dispo_dict['read-date'] # TODO: datetime return attachment return None
def getMailAttachment(connection, mailID, AttachmentNr): """AttachmentNr starting with 1 """ result,data = connection.fetch(mailID,"(RFC822)") raw_email = data[0][1] p = BytesParser() msg = p.parsebytes(raw_email) sender = msg.get('From') subject = msg.get('Subject') date_of_mail = msg.get('Date') mail_as_list = msg.get_payload() try: attachment = mail_as_list[AttachmentNr] # 0 is the message itself attachmentName = attachment.get_filename() except: attachment = None attachmentName = None print('Anhang Nr. ', AttachmentNr, ' of mail ', mailID, ' does not exist.') if 'Remotefox' not in sender: attachment = None attachmentName = None return attachment, attachmentName, date_of_mail, subject
def parse_attachment(self, message_part): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: dispo_type, dispo_dict = self.parse_dispositions(content_disposition) if dispo_type == "attachment" or (dispo_type == "inline" and "filename" in dispo_dict): content_type = message_part.get("Content-Type", None) file_data = message_part.get_payload(decode=True) if file_data is None: payloads = message_part.get_payload() file_data = "\n\n".join([p.as_string() for p in payloads]).encode("utf-8") attachment = BytesIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.name = None attachment.create_date = None attachment.mod_date = None attachment.read_date = None if "filename" in dispo_dict: attachment.name = dispo_dict["filename"] if content_type: _, content_dict = self.parse_dispositions(content_type) if "name" in content_dict: attachment.name = content_dict["name"] if attachment.name is None and content_type == "message/rfc822": p = Parser() msgobj = p.parse(BytesIO(attachment.getvalue())) subject = self.parse_header_field(msgobj["Subject"]) if subject: attachment.name = "%s.eml" % subject[:45] if "create-date" in dispo_dict: attachment.create_date = dispo_dict["create-date"] # TODO: datetime if "modification-date" in dispo_dict: attachment.mod_date = dispo_dict["modification-date"] # TODO: datetime if "read-date" in dispo_dict: attachment.read_date = dispo_dict["read-date"] # TODO: datetime return attachment return None
def extractMetaData(self,obj): headers = BytesParser().parse(obj) h = dict(headers.items()) return self.convertMetaDataToSwiftFormat(h)
class ArchivesParser(object): def __init__(self): self.parser = BytesParser(policy=compat32) def parse(self, stream): self.rawtxt = stream.read() self.msg = self.parser.parse(io.BytesIO(self.rawtxt)) def is_msgid(self, msgid): # Look for a specific messageid. This means we might parse it twice, # but so be it. Any exception means we know it's not this one... try: if self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) == msgid: return True except Exception as e: return False def analyze(self, date_override=None): self.msgid = self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) self._from = self.decode_mime_header(self.get_mandatory('From'), True) self.to = self.decode_mime_header(self.get_optional('To'), True) self.cc = self.decode_mime_header(self.get_optional('CC'), True) self.subject = self.decode_mime_header(self.get_optional('Subject')) if date_override: self.date = self.forgiving_date_decode(date_override) else: self.date = self.forgiving_date_decode(self.decode_mime_header(self.get_mandatory('Date'))) # Accept times up to 4 hours in the future, for badly synced clocks maxdate = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(hours=4) if self.date > maxdate: # Date is in the future, we don't trust that. Instead, let's see if we can find # it in the raw text of the message. def _extract_date(d): m = _re_received.match(d) if m: try: return self.forgiving_date_decode(m.group(1).strip()) except IgnorableException: pass lowdate = min((x for x in map(_extract_date, self.msg.get_all('Received')) if x and x < maxdate)) if lowdate: self.date = lowdate # Else we're going to go with what we found self.bodytxt = self.get_body() self.attachments = [] self.get_attachments() if len(self.attachments) > 0: log.status("Found %s attachments" % len(self.attachments)) # Build an list of the message id's we are interested in self.parents = [] # The first one is in-reply-to, if it exists if self.get_optional('in-reply-to'): m = self.clean_messageid(self.decode_mime_header(self.get_optional('in-reply-to')), True) if m: self.parents.append(m) # Then we add all References values, in backwards order if self.get_optional('references'): cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.decode_mime_header(self.get_optional('references')).split())] # Can't do this with a simple self.parents.extend() due to broken # mailers that add the same reference more than once. And we can't # use a set() to make it unique, because order is very important for m in cleaned_msgids: if m and m not in self.parents: self.parents.append(m) def clean_charset(self, charset): lcharset = charset.lower() if lcharset == 'unknown-8bit' or lcharset == 'x-unknown' or lcharset == 'unknown': # Special case where we don't know... We'll assume # us-ascii and use replacements return 'us-ascii' if lcharset == '0' or lcharset == 'x-user-defined' or lcharset == '_autodetect_all' or lcharset == 'default_charset': # Seriously broken charset definitions, map to us-ascii # and throw away the rest with replacements return 'us-ascii' if lcharset == 'x-gbk': # Some MUAs set it to x-gbk, but there is a valid # declaratoin as gbk... return 'gbk' if lcharset == 'iso-8859-8-i': # -I is a special logical version, but should be the # same charset return 'iso-8859-8' if lcharset == 'windows-874': # This is an alias for iso-8859-11 return 'iso-8859-11' if lcharset == 'iso-88-59-1' or lcharset == 'iso-8858-1': # Strange way of saying 8859.... return 'iso-8859-1' if lcharset == 'iso885915': return 'iso-8859-15' if lcharset == 'iso-latin-2': return 'iso-8859-2' if lcharset == 'iso-850': # Strange spelling of cp850 (windows charset) return 'cp850' if lcharset == 'koi8r': return 'koi8-r' if lcharset == 'cp 1252': return 'cp1252' if lcharset == 'iso-8859-1,iso-8859-2' or lcharset == 'iso-8859-1:utf8:us-ascii': # Why did this show up more than once?! return 'iso-8859-1' if lcharset == 'x-windows-949': return 'ms949' if lcharset == 'pt_pt' or lcharset == 'de_latin' or lcharset == 'de': # This is a locale, and not a charset, but most likely it's this one return 'iso-8859-1' if lcharset == 'iso-8858-15': # How is this a *common* mistake? return 'iso-8859-15' if lcharset == 'macintosh': return 'mac_roman' if lcharset == 'cn-big5': return 'big5' if lcharset == 'x-unicode-2-0-utf-7': return 'utf-7' if lcharset == 'tscii': # No support for this charset :S Map it down to ascii # and throw away all the rest. sucks, but we have to return 'us-ascii' return charset def get_payload_as_unicode(self, msg): try: b = msg.get_payload(decode=True) except AssertionError: # Badly encoded data can throw an exception here, where the python # libraries fail to handle it and enters a cannot-happen path. # In which case we just ignore it and hope for a better MIME part later. b = None if b: # Find out if there is a charset charset = None params = msg.get_params() if not params: # No content-type, so we assume us-ascii return str(b, 'us-ascii', errors='ignore') for k, v in params: if k.lower() == 'charset': charset = v break if charset: try: return str(b, self.clean_charset(charset), errors='ignore') except LookupError as e: raise IgnorableException("Failed to get unicode payload: %s" % e) else: # XXX: reasonable default? return str(b, errors='ignore') # Return None or empty string, depending on what we got back return b # Regular expression matching the PostgreSQL custom mail footer that # is appended to all emails. _re_footer = re.compile('(.*)--\s+\nSent via [^\s]+ mailing list \([^\)]+\)\nTo make changes to your subscription:\nhttp://www\.postgresql\.org/mailpref/[^\s]+\s*$', re.DOTALL) def get_body(self): b = self._get_body() if b: # Python bug 9133, allows unicode surrogate pairs - which PostgreSQL will # later reject.. if b.find('\udbff\n\udef8'): b = b.replace('\udbff\n\udef8', '') # Remove postgres specific mail footer - if it's there m = self._re_footer.match(b) if m: b = m.group(1) # Sometimes we end up with a trailing \0 when decoding long strings, so # replace it if it's there. # In fact, replace it everywhere, since it can also turn up in the middle # of a text when it's a really broken decoding. b = b.replace('\0', '') return b def _get_body(self): # This is where the magic happens - try to figure out what the body # of this message should render as. hasempty = False # First see if this is a single-part message that we can just # decode and go. b = self.get_payload_as_unicode(self.msg) if b: return b if b == '': # We found something, but it was empty. We'll keep looking as # there might be something better available, but make a note # that empty exists. hasempty = True # Ok, it's multipart. Find the first part that is text/plain, # and use that one. Do this recursively, since we may have something # like: # multipart/mixed: # multipart/alternative: # text/plain # text/html # application/octet-stream (attachment) b = self.recursive_first_plaintext(self.msg) if b: return b if b == '': hasempty = True # Couldn't find a plaintext. Look for the first HTML in that case. # Fallback, but what can we do at this point... b = self.recursive_first_plaintext(self.msg, True) if b: b = self.html_clean(b) if b: return b if b == '' or b is None: hasempty = True if hasempty: log.status('Found empty body in %s' % self.msgid) return '' raise IgnorableException("Don't know how to read the body from %s" % self.msgid) def recursive_first_plaintext(self, container, html_instead=False): pl = container.get_payload() if isinstance(pl, str): # This was not a multipart, but it leaked... Give up! return None for p in pl: if p.get_params() is None: # MIME multipart/mixed, but no MIME type on the part log.status("Found multipart/mixed in message '%s', but no MIME type on part. Trying text/plain." % self.msgid) return self.get_payload_as_unicode(p) if p.get_params()[0][0].lower() == 'text/plain': # Don't include it if it looks like an attachment if 'Content-Disposition' in p and p['Content-Disposition'].startswith('attachment'): continue return self.get_payload_as_unicode(p) if html_instead and p.get_params()[0][0].lower() == 'text/html': # Don't include it if it looks like an attachment if 'Content-Disposition' in p and p['Content-Disposition'].startswith('attachment'): continue return self.get_payload_as_unicode(p) if p.is_multipart(): b = self.recursive_first_plaintext(p, html_instead) if b or b == '': return b # Yikes, nothing here! Hopefully we'll find something when # we continue looping at a higher level. return None def get_attachments(self): self.attachments_found_first_plaintext = False self.recursive_get_attachments(self.msg) # Clean a filenames encoding and return it as a unicode string def _clean_filename_encoding(self, filename): # If this is a header-encoded filename, start by decoding that if filename.startswith('=?'): decoded, encoding = decode_header(filename)[0] return str(decoded, encoding, errors='ignore') # If it's already unicode, just return it if isinstance(filename, str): return filename # Anything that's not UTF8, we just get rid of. We can live with # filenames slightly mangled in this case. return str(filename, 'utf-8', errors='ignore') def _extract_filename(self, container): # Try to get the filename for an attachment in the container. # If the standard library can figure one out, use that one. f = container.get_filename() if f: return self._clean_filename_encoding(f) # Failing that, some mailers set Content-Description to the # filename if 'Content-Description' in container: return self._clean_filename_encoding(container['Content-Description']) return None def recursive_get_attachments(self, container): # We start recursion in the "multipart" container if any if container.get_content_type() == 'multipart/mixed' or container.get_content_type() == 'multipart/signed': # Multipart - worth scanning into if not container.is_multipart(): # Wow, this is broken. It's multipart/mixed, but doesn't # contain multiple parts. # Since we're just looking for attachments, let's just # ignore it... return for p in container.get_payload(): if p.get_params() is None: continue self.recursive_get_attachments(p) elif container.get_content_type() == 'multipart/alternative': # Alternative is not an attachment (we decide) # It's typilcally plantext + html self.attachments_found_first_plaintext = True return elif container.is_multipart(): # Other kinds of multipart, such as multipart/signed... return else: # Not a multipart. # Exclude specific contenttypes if container.get_content_type() == 'application/pgp-signature': return if container.get_content_type() in ('application/pkcs7-signature', 'application/x-pkcs7-signature'): return # For now, accept anything not text/plain if container.get_content_type() != 'text/plain': try: self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) except AssertionError: # Badly encoded data can throw an exception here, where the python # libraries fail to handle it and enters a cannot-happen path. # In which case we just ignore this attachment. return return # It's a text/plain, it might be worthwhile. # If it has a name, we consider it an attachments if not container.get_params(): return for k, v in container.get_params(): if k == 'name' and v != '': # Yes, it has a name try: self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) except AssertionError: # Badly encoded data can throw an exception here, where the python # libraries fail to handle it and enters a cannot-happen path. # In which case we just ignore this attachment. return return # If it's content-disposition=attachment, we also want to save it if 'Content-Disposition' in container and container['Content-Disposition'].startswith('attachment'): try: self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True))) except AssertionError: # Badly encoded data can throw an exception here, where the python # libraries fail to handle it and enters a cannot-happen path. # In which case we just ignore this attachment. return return # If we have already found one text/plain part, make all # further text/plain parts attachments if self.attachments_found_first_plaintext: # However, this will also *always* catch the MIME part added # by majordomo with the footer. So if that one is present, # we need to explicitly exclude it again. try: b = container.get_payload(decode=True) except AssertionError: # Badly encoded data can throw an exception here, where the python # libraries fail to handle it and enters a cannot-happen path. # In which case we just ignore this attachment. return if isinstance(b, str) and not self._re_footer.match(b): # We know there is no name for this one self.attachments.append((None, container.get_content_type(), b)) return # Ok, so this was a plaintext that we ignored. Set the flag # that we have now ignored one, so we'll make the next one # an attachment. self.attachments_found_first_plaintext = True # No name, and text/plain, so ignore it re_msgid = re.compile('^\s*<(.*)>\s*') def clean_messageid(self, messageid, ignorebroken=False): m = self.re_msgid.match(messageid) if not m: if ignorebroken: log.status("Could not parse messageid '%s', ignoring it" % messageid) return None raise IgnorableException("Could not parse message id '%s'" % messageid) return m.groups(1)[0].replace(' ', '') # _date_multi_re = re.compile(' \((\w+\s\w+(\s+\w+)*|)\)$') # Now using [^\s] instead of \w, to work with japanese chars _date_multi_re = re.compile(' \(([^\s]+\s[^\s]+(\s+[^\s]+)*|)\)$') _date_multi_re2 = re.compile(' ([\+-]\d{4}) \([^)]+\)$') _date_multiminus_re = re.compile(' -(-\d+)$') _date_offsetnoplus_re = re.compile(' (\d{4})$') def forgiving_date_decode(self, d): if d.strip() == '': raise IgnorableException("Failed to parse empty date") # Strange timezones requiring manual adjustments if d.endswith('-7700 (EST)'): d = d.replace('-7700 (EST)', 'EST') if d.endswith('+6700 (EST)'): d = d.replace('+6700 (EST)', 'EST') if d.endswith('+-4-30'): d = d.replace('+-4-30', '+0430') if d.endswith('+1.00'): d = d.replace('+1.00', '+0100') if d.endswith('+-100'): d = d.replace('+-100', '+0100') if d.endswith('+500'): d = d.replace('+500', '+0500') if d.endswith('-500'): d = d.replace('-500', '-0500') if d.endswith('-700'): d = d.replace('-700', '-0700') if d.endswith('-800'): d = d.replace('-800', '-0800') if d.endswith('+05-30'): d = d.replace('+05-30', '+0530') if d.endswith('+0-900'): d = d.replace('+0-900', '-0900') if d.endswith('Mexico/General'): d = d.replace('Mexico/General', 'CDT') if d.endswith('Pacific Daylight Time'): d = d.replace('Pacific Daylight Time', 'PDT') if d.endswith(' ZE2'): d = d.replace(' ZE2', ' +0200') if d.find('-Juin-') > 0: d = d.replace('-Juin-', '-Jun-') if d.find('-Juil-') > 0: d = d.replace('-Juil-', '-Jul-') if d.find(' 0 (GMT)'): d = d.replace(' 0 (GMT)', ' +0000') if self._date_multiminus_re.search(d): d = self._date_multiminus_re.sub(' \\1', d) if self._date_offsetnoplus_re.search(d): d = self._date_offsetnoplus_re.sub('+\\1', d) # We have a number of dates in the format # "<full datespace> +0200 (MET DST)" # or similar. The problem coming from the space within the # parenthesis, or if the contents of the parenthesis is # completely empty if self._date_multi_re.search(d): d = self._date_multi_re.sub('', d) # If the spec is instead # "<full datespace> +0200 (...)" # of any kind, we can just remove what's in the (), because the # parser is just going to rely on the fixed offset anyway. if self._date_multi_re2.search(d): d = self._date_multi_re2.sub(' \\1', d) try: dp = dateutil.parser.parse(d, fuzzy=True) # Some offsets are >16 hours, which postgresql will not # (for good reasons) accept if dp.utcoffset() and abs(dp.utcoffset().days * (24 * 60 * 60) + dp.utcoffset().seconds) > 60 * 60 * 16 - 1: # Convert it to a UTC timestamp using Python. It will give # us the right time, but the wrong timezone. Should be # enough... dp = datetime.datetime(*dp.utctimetuple()[:6]) if not dp.tzinfo: dp = dp.replace(tzinfo=datetime.timezone.utc) return dp except Exception as e: raise IgnorableException("Failed to parse date '%s': %s" % (d, e)) def _maybe_decode(self, s, charset): if isinstance(s, str): return s.strip(' ') return str(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore').strip(' ') # Workaround for broken quoting in some MUAs (see below) _re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE) def _decode_mime_header(self, hdr, email_workaround): if hdr is None: return None # Per http://bugs.python.org/issue504152 (and lots of testing), it seems # we must get rid of the sequence \n\t at least in the header. If we # do this *before* doing any MIME decoding, we should be safe against # anybody *actually* putting that sequence in the header (since we # won't match the encoded contents) hdr = hdr.replace("\n\t", " ") # In at least some cases, at least gmail (and possibly other MUAs) # incorrectly put double quotes in the name/email field even when # it's encoded. That's not allowed - they have to be escaped - but # since there's a fair amount of those, we apply a regex to get # rid of them. m = self._re_mailworkaround.search(hdr) if m: hdr = self._re_mailworkaround.sub(r'\1', hdr) try: return " ".join([self._maybe_decode(s, charset) for s, charset in decode_header(hdr)]) except HeaderParseError as e: # Parser error is typically someone specifying an encoding, # but then not actually using that encoding. We'll do the best # we can, which is cut it down to ascii and ignore errors return str(hdr, 'us-ascii', errors='ignore').strip(' ') def decode_mime_header(self, hdr, email_workaround=False): try: if isinstance(hdr, Header): hdr = hdr.encode() h = self._decode_mime_header(hdr, email_workaround) if h: return h.replace("\0", "") return '' except LookupError as e: raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, e)) except ValueError as ve: raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, ve)) def get_mandatory(self, fieldname): try: x = self.msg[fieldname] if x is None: raise Exception() return x except: raise IgnorableException("Mandatory field '%s' is missing" % fieldname) def get_optional(self, fieldname): try: return self.msg[fieldname] except: return '' def html_clean(self, html): # First we pass it through tidy (html, errors) = tidylib.tidy_document(html, options={ 'drop-proprietary-attributes': 1, 'alt-text': '', 'hide-comments': 1, 'output-xhtml': 1, 'show-body-only': 1, 'clean': 1, 'char-encoding': 'utf8', 'show-warnings': 0, 'show-info': 0, }) if errors: print(("HTML tidy failed for %s!" % self.msgid)) print(errors) return None try: cleaner = HTMLCleaner() cleaner.feed(html) return cleaner.get_text() except Exception as e: # Failed to parse the html, thus failed to clean it. so we must # give up... return None
def extractMetaData(self, obj): headers = BytesParser().parse(obj) metadata = dict(headers.items()) return self.cleanupMetaDataDict(metadata)
def __init__(self): self.parser = BytesParser(policy=compat32)
from email import policy from email.parser import BytesParser raw = sys.stdin.buffer.read() if not os.isatty(0): fd = os.open('/dev/tty', os.O_RDONLY) if fd < 0: sys.stderr.write('Unable to open an input tty.\n') sys.exit(-1) else: os.dup2(fd, 0) os.close(fd) msg = BytesParser(policy=policy.default).parsebytes(raw) # We can extract the richest alternative in order to display it: richest = msg.get_body() partfiles = {} if richest['content-type'].maintype == 'text': if richest['content-type'].subtype == 'plain': for line in richest.get_content().splitlines(): print(line) sys.exit() elif richest['content-type'].subtype == 'html': body = richest else: print("Don't know how to display {}".format(richest.get_content_type())) sys.exit() elif richest['content-type'].content_type == 'multipart/related':
logger.exception('Wrong login/password!') sys.exit() # Getting all unseen mail result, data = mail.search(None, 'unseen') if len(data[0]) == 0: logger.info('No unseen mails!') logger.info(mail.logout()) logger.info(s.quit()) else: ids = data[0].split() # getting unseen letters id list msgs = [] for x in ids: result, data = mail.fetch(x, 'RFC822') parser = BytesParser() msg = parser.parsebytes(data[0][1]) # changing "To" and "From" fields in header msg.__delitem__('To') msg.__setitem__('To', smtp_send_to_header) msg.__delitem__('From') msg.__setitem__('From', smtp_login) msgs.append(msg) logger.info('Have %d new letters', len(ids)) logger.info('Close imap protocol') logger.info(mail.logout()) # sending messages i = 0 for msg in msgs:
import os import sys import tempfile import mimetypes import webbrowser # Import the email modules we'll need from email import policy from email.parser import BytesParser # An imaginary module that would make this work and be safe. from imaginary import magic_html_parser # In a real program you'd get the filename from the arguments. msg = BytesParser(policy=policy.default).parse(open('outgoing.msg', 'rb')) # Now the header items can be accessed as a dictionary, and any non-ASCII will # be converted to unicode: print('To:', msg['to']) print('From:', msg['from']) print('Subject:', msg['subject']) # If we want to print a priview of the message content, we can extract whatever # the least formatted payload is and print the first three lines. Of course, # if the message has no plain text part printing the first three lines of html # is probably useless, but this is just a conceptual example. simplest = msg.get_body(preferencelist=('plain', 'html')) print() print(''.join(simplest.get_content().splitlines(keepends=True)[:3])) ans = input("View full message?")
def parse(self, bytesfile): p = Parser() msgobj = p.parse(bytesfile) subject = self.parse_header_field(msgobj["Subject"]) attachments = [] body = [] html = [] self.parse_body(msgobj.walk(), attachments, body, html) body = u"\n".join(body) html = u"\n".join(html) tos = self.get_address_list(msgobj.get_all("To", [])) tos.extend(self.get_address_list(msgobj.get_all("X-Original-To", []))) ccs = self.get_address_list(msgobj.get_all("Cc", [])) resent_tos = self.get_address_list(msgobj.get_all("resent-to", [])) resent_ccs = self.get_address_list(msgobj.get_all("resent-cc", [])) from_field = parseaddr(self.get(msgobj.get("From"))) from_field = (self.parse_header_field(from_field[0]), from_field[1].lower() if from_field[1] else from_field[1]) date = self.parse_date(self.get(msgobj.get("Date"))) return { "msgobj": msgobj, "date": date, "subject": subject, "body": body, "html": html, "from": from_field, "to": tos, "cc": ccs, "resent_to": resent_tos, "resent_cc": resent_ccs, "attachments": attachments, }