Exemplo n.º 1
0
    def test_remove_next_part_from_content(self):
        with open(get_test_file("pipermail_nextpart.txt")) as email_file:
            msg = message_from_file(email_file)
        scrubber = Scrubber(msg)
        contents = scrubber.scrub()[0]

        self.failIf("-------------- next part --------------" in contents)
Exemplo n.º 2
0
    def test_remove_next_part_from_content(self):
        with open(get_test_file("pipermail_nextpart.txt")) as email_file:
            msg = message_from_file(email_file, policy=policy.SMTP)
        scrubber = Scrubber(msg)
        contents = scrubber.scrub()[0]

        self.assertFalse("-------------- next part --------------" in contents)
Exemplo n.º 3
0
 def test_html_only_email(self):
     # This email only has an HTML part, thus the scrubbed content will be
     # empty. It should be an unicode empty string, not str.
     with open(get_test_file("html-email-2.txt")) as email_file:
         msg = message_from_file(email_file, policy=policy.SMTP)
     scrubber = Scrubber(msg)
     contents = scrubber.scrub()[0]
     self.assertTrue(isinstance(contents, str),
                     "Scrubbed content should always be unicode")
Exemplo n.º 4
0
 def test_name_unicode(self):
     for num in range(1, 6):
         with open(get_test_file("attachment-%d.txt" % num)) as email_file:
             msg = message_from_file(email_file)
         scrubber = Scrubber(msg)
         attachments = scrubber.scrub()[1]
         for attachment in attachments:
             name = attachment[1]
             self.assertTrue(isinstance(name, unicode),
                             "attachment %r must be unicode" % name)
Exemplo n.º 5
0
 def _test_non_ascii_payload(self, enc):
     with open(get_test_file("payload-%s.txt" % enc), 'rb') as email_file:
         msg = message_from_binary_file(email_file, policy=policy.SMTP)
         scrubber = Scrubber(msg)
         contents = scrubber.scrub()[0]
         self.assertTrue(isinstance(contents, str))
         self.assertEqual(
             contents,
             'This message contains non-ascii characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n'  # noqa
         )
Exemplo n.º 6
0
 def test_bad_content_type(self):
     """Scrubber must handle unknown content-types"""
     with open(get_test_file("payload-unknown.txt")) as email_file:
         msg = message_from_file(email_file)
     scrubber = Scrubber(msg)
     try:
         contents = scrubber.scrub()[0]
     except LookupError, e:
         import traceback
         print(traceback.format_exc())
         self.fail(e)  # codec not found
Exemplo n.º 7
0
 def test_non_ascii_payload(self):
     """Scrubber must handle non-ascii messages"""
     for enc in ["utf8", "iso8859"]:
         with open(get_test_file("payload-%s.txt" % enc)) as email_file:
             msg = message_from_file(email_file)
         scrubber = Scrubber(msg)
         contents = scrubber.scrub()[0]
         self.assertTrue(isinstance(contents, unicode))
         self.assertEqual(
             contents,
             u'This message contains non-ascii '
             u'characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n')
Exemplo n.º 8
0
 def test_html_email_1(self):
     with open(get_test_file("html-email-1.txt")) as email_file:
         msg = message_from_file(email_file, policy=policy.SMTP)
     scrubber = Scrubber(msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     # HTML part
     self._check_html_attachment(
         attachments[0], (2, "attachment.html", "text/html", "iso-8859-1"))
     self.assertEqual(len(attachments[0][4]), 2688)
     # Scrubbed content
     self.assertEqual(
         contents, "This is a test message\n"
         "Non-ASCII chars: r\xe9ponse fran\xe7ais \n\n\n")
Exemplo n.º 9
0
 def test_attachment_1(self):
     with open(get_test_file("attachment-1.txt")) as email_file:
         msg = message_from_file(email_file, policy=policy.SMTP)
     scrubber = Scrubber(msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     self.assertEqual(attachments[0],
                      (2, 'puntogil.vcf', 'text/x-vcard', "utf-8",
                       'begin:vcard\nfn:gil\nn:;gil\nversion:2.1\n'
                       'end:vcard\n\n'))
     self.assertEqual(
         contents, "This is a test message.\n\n"
         "\n-- \ndevel mailing list\[email protected]\n"
         "https://admin.fedoraproject.org/mailman/listinfo/devel\n")
Exemplo n.º 10
0
 def test_attachment_5(self):
     with open(get_test_file("attachment-5.txt")) as email_file:
         msg = message_from_file(email_file, policy=policy.SMTP)
     scrubber = Scrubber(msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     # text attachment
     self.assertEqual(attachments[0][0:4],
                      (2, "todo-déjeuner.txt", "text/plain", "utf-8"))
     self.assertEqual(len(attachments[0][4]), 110)
     # Scrubbed content
     self.assertEqual(
         contents, 'This is a test, HTML message with '
         'accented letters : \xe9 \xe8 \xe7 \xe0.\nAnd an '
         'attachment with an accented filename\n\n\n\n\n\n')
Exemplo n.º 11
0
 def test_attachment_3(self):
     with open(get_test_file("attachment-3.txt")) as email_file:
         msg = message_from_file(email_file, policy=policy.SMTP)
     scrubber = Scrubber(msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 2)
     # HTML part
     self._check_html_attachment(
         attachments[0], (3, "attachment.html", "text/html", "iso-8859-1"))
     self.assertEqual(len(attachments[0][4]), 3114)
     # Image attachment
     self.assertEqual(attachments[1][0:4],
                      (4, "GeoffreyRoucourt.jpg", "image/jpeg", None))
     self.assertEqual(len(attachments[1][4]), 282180)
     # Scrubbed content
     self.assertEqual(contents, "This is a test message\n\n\n")
Exemplo n.º 12
0
 def test_attachment_name_badly_encoded(self):
     with open(get_test_file("email-bad-filename.txt"), 'rb') as email_file:
         msg = message_from_binary_file(email_file, policy=policy.SMTP)
     scrubber = Scrubber(msg)
     try:
         attachments = scrubber.scrub()[1]
     except UnicodeDecodeError:
         print(format_exc())
         self.fail("Could not decode the filename")
     # The filename has non-ascii characters without the encoding specified,
     # Python will try to decode their name with best guess (ascii) and then
     # replace the characters that don't correspond to an ascii code
     # point. Then, we scrub the filename to allow only alpahun with dash,
     # underscore and dot.
     self.assertEqual(
         attachments,
         [(0, 'non-ascii-u3b5.jpg', 'text/plain', None, 'Dummy content\n')])
Exemplo n.º 13
0
 def test_attachment_2(self):
     with open(get_test_file("attachment-2.txt")) as email_file:
         msg = message_from_file(email_file)
     scrubber = Scrubber(msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     self.assertEqual(attachments[0], (
             3, u'signature.asc', u'application/pgp-signature', None,
             '-----BEGIN PGP SIGNATURE-----\r\nVersion: GnuPG v1.4.12 '
             '(GNU/Linux)\r\nComment: Using GnuPG with Mozilla - '
             'http://www.enigmail.net/\r\n\r\niEYEARECAAYFAlBhm3oACgkQhmBj'
             'z394AnmMnQCcC+6tWcqE1dPQmIdRbLXgKGVp\r\nEeUAn2OqtaXaXaQV7rx+'
             'SmOldmSzcFw4\r\n=OEJv\r\n-----END PGP SIGNATURE-----\r\n'))
     self.assertEqual(
         contents,
         u"This is a test message\r\nNon-ascii chars: Hofm\xfchlgasse\r\n"
         u"\n-- \ndevel mailing list\[email protected]\n"
         u"https://admin.fedoraproject.org/mailman/listinfo/devel\n"
         )
Exemplo n.º 14
0
 def test_attachment_4(self):
     with open(get_test_file("attachment-4.txt")) as email_file:
         msg = message_from_file(email_file, policy=policy.SMTP)
     scrubber = Scrubber(msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 2)
     # HTML part
     self._check_html_attachment(
         attachments[0], (3, "attachment.html", "text/html", "iso-8859-1"))
     self.assertEqual(len(attachments[0][4]), 113)
     # text attachment
     self.assertEqual(attachments[1][0:4],
                      (4, "todo-déjeuner.txt", "text/plain", "utf-8"))
     self.assertEqual(len(attachments[1][4]), 110)
     # Scrubbed content
     self.assertEqual(
         contents, 'This is a test, HTML message with '
         'accented letters : \xe9 \xe8 \xe7 \xe0.\nAnd an '
         'attachment with an accented filename\n\n\n\n\n')