def get_main_content(self, msg: email.message.EmailMessage): ''' メール本文、フォーマット、キャラクターセットを取得する。 ''' try: body_part = msg.get_body() main_content = body_part.get_content() format_ = body_part.get_content_type() charset = body_part.get_content_charset() except Exception as error: print(error) main_content = '解析失敗' format_ = '不明' charset = '不明' # get_bodyでエラーになるのは文字コード設定がおかしいメールを受信した場合なので、 # decodeせずにテキスト部分をそのまま返す。 for part in msg.walk(): if part.get_content_type() == 'text/plain': format_ = part.get_content_type() main_content = str(part.get_payload()) charset = part.get_content_charset() return main_content, format_, charset
async def handle_message(self, message: email.message.EmailMessage): transactional_mail = False if message.get("Remove-List-Unsubscribe", None): del message["List-Unsubscribe"] del message["Remove-List-Unsubscribe"] transactional_mail = True del message['X-Peer'] del message['X-MailFrom'] del message['X-RcptTo'] await aiosmtplib.send(message, hostname=SMTP_HOST, port=SMTP_PORT) if transactional_mail: log.info( f'Transactional Mail has been sent to {",".join(message.get_all("to", []))}' ) else: log.info( f'Non-Transactional Mail has been sent to {",".join(message.get_all("to", []))}' ) if LOG_CONTENT: for part in message.walk(): # each part is a either non-multipart, or another multipart message # that contains further parts... Message is organized like a tree if part.get_content_type() == 'text/plain': log.info(part.get_payload()) if part.get_content_type() == 'text/html': log.info(part.get_payload())
def _convert_message_part_to_str(message_part: email.message.EmailMessage) -> Tuple[str, bool]: content: Union[str, bytes] = message_part.get_content() content_type = message_part.get_content_type() content_transfer_encoding = message_part['Content-Transfer-Encoding'] logger_dict = {'ContentType': content_type, 'ContentTransferEncoding': content_transfer_encoding} if isinstance(content, str): logger.info('Successfully decoded message part with {ContentType} {ContentTransferEncoding} as string', fparams=logger_dict) return content, False try: if content_type == 'application/xml': decoded_content = content.decode() logger.info('Successfully decoded message part with {ContentType} {ContentTransferEncoding} ' 'as a string', fparams=logger_dict) return decoded_content, False decoded_content = base64.b64encode(content).decode() logger.info('Successfully encoded binary message part with {ContentType} {ContentTransferEncoding} as ' 'a base64 string', fparams=logger_dict) return decoded_content, True except UnicodeDecodeError as e: logger.error('Failed to decode ebXML message part with {ContentType} {ContentTransferEncoding}.', fparams=logger_dict) raise ebxml_envelope.EbXmlParsingError(f'Failed to decode ebXML message part with ' f'Content-Type: {content_type} and ' f'Content-Transfer-Encoding: {content_transfer_encoding}') from e
def _extract_message_parts(msg: email.message.EmailMessage) -> Tuple[str, str, List[Dict[str, Union[str, bool]]]]: """Extract the ebXML and payload parts of the message and return them as a tuple. :param msg: The message to extract parts from. :return: A tuple containing the ebXML and payload (if present, otherwise None) parts of the message provided. """ # EIS section 2.5.4 defines that the first MIME part must contain the ebML SOAP message and the message payload # (if present) must be the first additional attachment. if not msg.is_multipart(): logger.error('Non-multipart message received') raise ebxml_envelope.EbXmlParsingError("Non-multipart message received") message_parts: Sequence[email.message.EmailMessage] = tuple(msg.iter_parts()) EbxmlRequestEnvelope._report_any_defects_in_message_parts(message_parts) # ebXML part is the first part of the message ebxml_part = EbxmlRequestEnvelope._extract_ebxml_part(message_parts[0]) payload_part = None attachments = [] if len(message_parts) > 1: # HL7 payload part is the second part of the message payload_part = EbxmlRequestEnvelope._extract_hl7_payload_part(message_parts[1]) # Any additional attachments are from the third part of the message onwards attachments.extend(EbxmlRequestEnvelope._extract_additional_attachments_parts(message_parts[2:])) return ebxml_part, payload_part, attachments
def _get_payload(self, message: email.message.EmailMessage) -> str: """ Get the body of the email. Note: MODIFIES message fetched to be seen. """ if message.is_multipart(): return message.get_payload(0).get_payload() else: return message.get_payload(0)
def get_attachments(self, msg: email.message.EmailMessage, b_num: bytes): ''' 添付ファイルが存在する場合はファイルに出力し、ファイルパスとファイル名を返す。 ファイルはメール番号のディレクトリに格納する。 ''' files = [] for part in msg.iter_attachments(): try: filename = part.get_filename() if not filename: continue # メール番号でディレクトリを作成 new_dir = os.path.join('./tmp/{}/'.format( b_num.decode('utf-8'))) os.makedirs(new_dir, exist_ok=True) # 添付ファイルを出力 file_path = os.path.join(new_dir, filename) with open(file_path, 'wb') as fp: fp.write(part.get_payload(decode=True)) # ファイルパス(絶対パス)とファイル名を保存 abs_path = os.path.abspath(file_path) files.append({'file_path': abs_path, 'file_name': filename}) except Exception as error: print(error) return files
def filter_email(eml: email.message.EmailMessage) -> str: # Get plaintext part body = eml.get_body("plain") if body == None: raise EmailParseException() # Find a link in the body link_re = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" urls = re.findall(link_re, body.get_content()) if len(urls) == 0: raise LinkNotFoundException() url: str = urls[0][0] # Some sites require this header headers = { 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7' } # Download the contents at that URL with urllib request = urllib.request.Request(url, headers=headers) with urllib.request.urlopen(request) as response: webpage = response.read() # Parse with BeautifulSoup bs = BeautifulSoup(webpage, features='lxml') content = bs.prettify() return content return url
def get_header_text(self, msg: email.message.EmailMessage): ''' ヘッダー部をまとめて文字列として返す。 ''' text = '' for key, value in msg.items(): text = text + '{}: {}\n'.format(key, value) return text
def _get_body(msg: email.message.EmailMessage) -> Optional[str]: candidate = None for part in msg.walk(): if part.get_content_type() == "text/plain" and not part.is_multipart() and part.get( "Content-Disposition") is None: if not candidate: candidate = part.get_payload(decode=True).decode("utf-8") else: raise Exception("two possible candidates!") return candidate
def _store_fault(item: Item, number: int, mail: email.message.EmailMessage) -> str: filename = "{0:s}/{1:s}_{2:d}.eml".format(pmlib.config.target_path, item.name, number) filename = os.path.abspath(os.path.normpath(filename)) error_text = "Unable to decode mail {0:d} in {1:s}".format( number, item.name) f = open(filename, mode="w", encoding='utf-8') f.write(mail.as_string()) f.close() return error_text
def process_message(uid, msg: email.message.EmailMessage): try: raw_date = msg.get('Date') local_date = None # Now convert to local date-time date_tuple = email.utils.parsedate_tz(raw_date) if date_tuple: local_date = datetime.datetime.fromtimestamp( email.utils.mktime_tz(date_tuple)) except: raw_date = None local_date = None links = [] try: body = msg.get_body(('html', 'plain')) if body: if body.get_content_type() == 'text/plain': links = links_from_plaintext(body.get_content()) elif body.get_content_type() == 'text/html': links = links_from_html(body.get_content()) except: pass try: msg_to = msg.get('To') except: msg_to = None try: msg_from = msg.get('From') except: msg_from = None try: msg_sub = msg.get('Subject') except: msg_sub = None try: msg_id = msg.get('Message-ID') except: msg_id = None info = { 'uid': uid, 'to': msg_to, 'from': msg_from, 'subject': msg_sub, 'raw_date': raw_date, 'local_date': local_date, 'message-id': msg_id, 'links': links, 'num-links': len(links) } return info
def email_message_to_plain(em: email.message.EmailMessage) -> str: """ 获取EmailMessage对象中的简单文本 @ param em: EmailMessage对象 @ return: 给定对象中的简单文本 """ for part in em.walk(): part_content_type = part.get_content_type() if part_content_type not in ['text/plain','text/html']: continue try: part_content = part.get_content() except Exception: part_content = str(part.get_payload()) if part_content_type == 'text/plain': return part_content if part_content is not None else "empty" else: return Formatter.html_to_plain(part_content)