def test_remove_next_part_from_content(self): with open(get_test_file("pipermail_nextpart.txt")) as email_file: msg = message_from_file(email_file) scrubber = Scrubber(msg) contents = scrubber.scrub()[0] self.failIf("-------------- next part --------------" in contents)
def test_remove_next_part_from_content(self): with open(get_test_file("pipermail_nextpart.txt")) as email_file: msg = message_from_file(email_file, policy=policy.SMTP) scrubber = Scrubber(msg) contents = scrubber.scrub()[0] self.assertFalse("-------------- next part --------------" in contents)
def test_html_only_email(self): # This email only has an HTML part, thus the scrubbed content will be # empty. It should be an unicode empty string, not str. with open(get_test_file("html-email-2.txt")) as email_file: msg = message_from_file(email_file, policy=policy.SMTP) scrubber = Scrubber(msg) contents = scrubber.scrub()[0] self.assertTrue(isinstance(contents, str), "Scrubbed content should always be unicode")
def test_name_unicode(self): for num in range(1, 6): with open(get_test_file("attachment-%d.txt" % num)) as email_file: msg = message_from_file(email_file) scrubber = Scrubber(msg) attachments = scrubber.scrub()[1] for attachment in attachments: name = attachment[1] self.assertTrue(isinstance(name, unicode), "attachment %r must be unicode" % name)
def _test_non_ascii_payload(self, enc): with open(get_test_file("payload-%s.txt" % enc), 'rb') as email_file: msg = message_from_binary_file(email_file, policy=policy.SMTP) scrubber = Scrubber(msg) contents = scrubber.scrub()[0] self.assertTrue(isinstance(contents, str)) self.assertEqual( contents, 'This message contains non-ascii characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n' # noqa )
def test_bad_content_type(self): """Scrubber must handle unknown content-types""" with open(get_test_file("payload-unknown.txt")) as email_file: msg = message_from_file(email_file) scrubber = Scrubber(msg) try: contents = scrubber.scrub()[0] except LookupError, e: import traceback print(traceback.format_exc()) self.fail(e) # codec not found
def test_non_ascii_payload(self): """Scrubber must handle non-ascii messages""" for enc in ["utf8", "iso8859"]: with open(get_test_file("payload-%s.txt" % enc)) as email_file: msg = message_from_file(email_file) scrubber = Scrubber(msg) contents = scrubber.scrub()[0] self.assertTrue(isinstance(contents, unicode)) self.assertEqual( contents, u'This message contains non-ascii ' u'characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n')
def test_html_email_1(self): with open(get_test_file("html-email-1.txt")) as email_file: msg = message_from_file(email_file, policy=policy.SMTP) scrubber = Scrubber(msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 1) # HTML part self._check_html_attachment( attachments[0], (2, "attachment.html", "text/html", "iso-8859-1")) self.assertEqual(len(attachments[0][4]), 2688) # Scrubbed content self.assertEqual( contents, "This is a test message\n" "Non-ASCII chars: r\xe9ponse fran\xe7ais \n\n\n")
def test_attachment_1(self): with open(get_test_file("attachment-1.txt")) as email_file: msg = message_from_file(email_file, policy=policy.SMTP) scrubber = Scrubber(msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 1) self.assertEqual(attachments[0], (2, 'puntogil.vcf', 'text/x-vcard', "utf-8", 'begin:vcard\nfn:gil\nn:;gil\nversion:2.1\n' 'end:vcard\n\n')) self.assertEqual( contents, "This is a test message.\n\n" "\n-- \ndevel mailing list\[email protected]\n" "https://admin.fedoraproject.org/mailman/listinfo/devel\n")
def test_attachment_5(self): with open(get_test_file("attachment-5.txt")) as email_file: msg = message_from_file(email_file, policy=policy.SMTP) scrubber = Scrubber(msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 1) # text attachment self.assertEqual(attachments[0][0:4], (2, "todo-déjeuner.txt", "text/plain", "utf-8")) self.assertEqual(len(attachments[0][4]), 110) # Scrubbed content self.assertEqual( contents, 'This is a test, HTML message with ' 'accented letters : \xe9 \xe8 \xe7 \xe0.\nAnd an ' 'attachment with an accented filename\n\n\n\n\n\n')
def test_attachment_3(self): with open(get_test_file("attachment-3.txt")) as email_file: msg = message_from_file(email_file, policy=policy.SMTP) scrubber = Scrubber(msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 2) # HTML part self._check_html_attachment( attachments[0], (3, "attachment.html", "text/html", "iso-8859-1")) self.assertEqual(len(attachments[0][4]), 3114) # Image attachment self.assertEqual(attachments[1][0:4], (4, "GeoffreyRoucourt.jpg", "image/jpeg", None)) self.assertEqual(len(attachments[1][4]), 282180) # Scrubbed content self.assertEqual(contents, "This is a test message\n\n\n")
def test_attachment_name_badly_encoded(self): with open(get_test_file("email-bad-filename.txt"), 'rb') as email_file: msg = message_from_binary_file(email_file, policy=policy.SMTP) scrubber = Scrubber(msg) try: attachments = scrubber.scrub()[1] except UnicodeDecodeError: print(format_exc()) self.fail("Could not decode the filename") # The filename has non-ascii characters without the encoding specified, # Python will try to decode their name with best guess (ascii) and then # replace the characters that don't correspond to an ascii code # point. Then, we scrub the filename to allow only alpahun with dash, # underscore and dot. self.assertEqual( attachments, [(0, 'non-ascii-u3b5.jpg', 'text/plain', None, 'Dummy content\n')])
def test_attachment_2(self): with open(get_test_file("attachment-2.txt")) as email_file: msg = message_from_file(email_file) scrubber = Scrubber(msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 1) self.assertEqual(attachments[0], ( 3, u'signature.asc', u'application/pgp-signature', None, '-----BEGIN PGP SIGNATURE-----\r\nVersion: GnuPG v1.4.12 ' '(GNU/Linux)\r\nComment: Using GnuPG with Mozilla - ' 'http://www.enigmail.net/\r\n\r\niEYEARECAAYFAlBhm3oACgkQhmBj' 'z394AnmMnQCcC+6tWcqE1dPQmIdRbLXgKGVp\r\nEeUAn2OqtaXaXaQV7rx+' 'SmOldmSzcFw4\r\n=OEJv\r\n-----END PGP SIGNATURE-----\r\n')) self.assertEqual( contents, u"This is a test message\r\nNon-ascii chars: Hofm\xfchlgasse\r\n" u"\n-- \ndevel mailing list\[email protected]\n" u"https://admin.fedoraproject.org/mailman/listinfo/devel\n" )
def test_attachment_4(self): with open(get_test_file("attachment-4.txt")) as email_file: msg = message_from_file(email_file, policy=policy.SMTP) scrubber = Scrubber(msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 2) # HTML part self._check_html_attachment( attachments[0], (3, "attachment.html", "text/html", "iso-8859-1")) self.assertEqual(len(attachments[0][4]), 113) # text attachment self.assertEqual(attachments[1][0:4], (4, "todo-déjeuner.txt", "text/plain", "utf-8")) self.assertEqual(len(attachments[1][4]), 110) # Scrubbed content self.assertEqual( contents, 'This is a test, HTML message with ' 'accented letters : \xe9 \xe8 \xe7 \xe0.\nAnd an ' 'attachment with an accented filename\n\n\n\n\n')