def _update_mime_text_info(self, msg, payload, part, text): charset = part.get_charset() text_count = self.get_local(msg, "mime_body_text_count") self.set_local(msg, "mime_body_text_count", text_count + 1) if part.get_content_subtype() == "plain": plain_characters_count = self.get_local(msg, "plain_characters_count") self.set_local(msg, "plain_characters_count", plain_characters_count + len(text)) ascii_count = self.get_local(msg, "ascii_count") ascii_count += len(text) self.set_local(msg, "ascii_count", ascii_count) unicode_chars = Regex(r"(&\#x[0-9A-F]{4};)", re.X).search(text) unicode_count = 0 if unicode_chars: unicode_count = self.get_local(msg, "unicode_count") unicode_count += len(unicode_chars.groups()) self.set_local(msg, "unicode_count", unicode_count) # XXX This does not work properly anymore if not charset or charset == r"us-ascii": try: payload.encode("ascii") except (UnicodeEncodeError, UnicodeDecodeError): self.set_local(msg, "mime_ascii_text_illegal", True) if len(re.split("--", msg.raw_msg)) <= 4: self.set_local(msg, "mime_missing_boundary", True)
def _update_quopri_stats(self, msg, part): max_line_len = 79 qp_count = self.get_local(msg, "mime_qp_count") qp_bytes = self.get_local(msg, "qp_bytes") qp_chars = self.get_local(msg, "qp_chars") quoted_printables = Regex(r"=(?:09|3[0-9ABCEF]|[2456][0-9A-F]|7[" r"0-9A-E])").search(part.get_payload()) qp_bytes += len(part.get_payload()) self.set_local(msg, "qp_bytes", qp_bytes) if quoted_printables: qp_chars += len(quoted_printables.groups()) self.set_local(msg, "qp_chars", qp_chars) self.set_local(msg, "mime_qp_count", qp_count + 1) raw = msg.translate_line_breaks(part.as_string()) has_long_line = self.get_local(msg, "mime_qp_long_line") if not has_long_line: has_long_line = any( len("".join(line.split(":")[1:])) > max_line_len and not line.startswith("SPAM") for line in raw.splitlines()) self.set_local(msg, "mime_qp_long_line", has_long_line)