Пример #1
0
    def test_body_props_charsets(self):
        text_8859_10 = "Detta är det vanliga innehållet".encode("ISO-8859-10")
        html_8859_8 = "<p>HTML זהו תוכן</p>".encode("ISO-8859-8")
        raw = dedent("""\
            MIME-Version: 1.0
            Subject: Charset test
            Content-Type: multipart/alternative; boundary="this_is_a_boundary"

            --this_is_a_boundary
            Content-Type: text/plain; charset=ISO-8859-10
            Content-Transfer-Encoding: QUOTED-PRINTABLE

            {text}
            --this_is_a_boundary
            Content-Type: text/html; charset=ISO-8859-8
            Content-Transfer-Encoding: QUOTED-PRINTABLE

            {html}
            --this_is_a_boundary--
            """).format(
                text=quopri.encodestring(text_8859_10).decode("ASCII"),
                html=quopri.encodestring(html_8859_8).decode("ASCII"),
            )

        msg = AnymailInboundMessage.parse_raw_mime(raw)
        self.assertEqual(msg.defects, [])
        self.assertEqual(msg.text, "Detta är det vanliga innehållet")
        self.assertEqual(msg.html, "<p>HTML זהו תוכן</p>")

        self.assertEqual(msg.get_payload(0).get_content_bytes(), text_8859_10)
        self.assertEqual(msg.get_payload(0).get_content_text(), "Detta är det vanliga innehållet")
        self.assertEqual(msg.get_payload(1).get_content_bytes(), html_8859_8)
        self.assertEqual(msg.get_payload(1).get_content_text(), "<p>HTML זהו תוכן</p>")
Пример #2
0
    def test_attachment_as_uploaded_file_security(self):
        # Raw attachment filenames can be malicious; we want to make sure that
        # our Django file converter sanitizes them (as much as any uploaded filename)
        raw = dedent("""\
            MIME-Version: 1.0
            Subject: Attachment test
            Content-Type: multipart/mixed; boundary="this_is_a_boundary"

            --this_is_a_boundary
            Content-Type: text/plain; charset="UTF-8"

            The malicious attachment filenames below need to get sanitized

            --this_is_a_boundary
            Content-Type: text/plain; name="report.txt"
            Content-Disposition: attachment; filename="/etc/passwd"

            # (not that overwriting /etc/passwd is actually a thing
            # anymore, but you get the point)
            --this_is_a_boundary
            Content-Type: text/html
            Content-Disposition: attachment; filename="../static/index.html"

            <body>Hey, did I overwrite your site?</body>
            --this_is_a_boundary--
            """)
        msg = AnymailInboundMessage.parse_raw_mime(raw)
        attachments = msg.attachments

        self.assertEqual(attachments[0].get_filename(), "/etc/passwd")  # you wouldn't want to actually write here
        self.assertEqual(attachments[0].as_uploaded_file().name, "passwd")  # path removed - good!

        self.assertEqual(attachments[1].get_filename(), "../static/index.html")
        self.assertEqual(attachments[1].as_uploaded_file().name, "index.html")  # ditto for relative paths
Пример #3
0
    def test_missing_or_invalid_charsets(self):
        """get_content_text has options for handling missing/invalid charset declarations"""
        raw = dedent("""\
            Subject: Oops, missing charset declaration
            Content-Type: text/plain
            Content-Transfer-Encoding: quoted-printable

            Algunos programas de correo electr=f3nico est=e1n rotos
            """)
        msg = AnymailInboundMessage.parse_raw_mime(raw)
        self.assertEqual(msg.defects, [])

        # default is charset from Content-Type (or 'utf-8' if missing), errors='replace'; .text uses defaults
        self.assertEqual(msg.get_content_text(),
                         "Algunos programas de correo electr�nico est�n rotos\n")
        self.assertEqual(msg.text, "Algunos programas de correo electr�nico est�n rotos\n")

        # can give specific charset if you know headers are wrong/missing
        self.assertEqual(msg.get_content_text(charset='ISO-8859-1'),
                         "Algunos programas de correo electrónico están rotos\n")

        # can change error handling
        with self.assertRaises(UnicodeDecodeError):
            msg.get_content_text(errors='strict')
        self.assertEqual(msg.get_content_text(errors='ignore'),
                         "Algunos programas de correo electrnico estn rotos\n")
Пример #4
0
    def test_parse_encoded_params(self):
        raw = dedent("""\
            MIME-Version: 1.0
            Content-Type: multipart/mixed; boundary="this_is_a_boundary"

            --this_is_a_boundary
            Content-Type: text/plain; charset="UTF-8"

            This is the body

            --this_is_a_boundary
            Content-Type: text/plain; name*=us-ascii''TPS%20Report
            Content-Disposition: attachment;
             filename*=iso-8859-1''Une%20pi%E8ce%20jointe%2Etxt

            This is an attachment
            --this_is_a_boundary--
            """)
        msg = AnymailInboundMessage.parse_raw_mime(raw)
        att = msg.attachments[0]
        self.assertTrue(att.is_attachment())
        self.assertEqual(att.get_content_disposition(), "attachment")
        self.assertEqual(
            collapse_rfc2231_value(att.get_param("Name",
                                                 header="Content-Type")),
            "TPS Report")
        self.assertEqual(att.get_filename(), "Une pièce jointe.txt")
Пример #5
0
    def test_parse_folded_headers(self):
        raw = dedent("""\
            Content-Type: text/plain
            Subject: This subject uses
             header folding
            X-Json: {"problematic":
             ["encoded newline\\n",
             "comma,semi;no space"]}

            Not-A-Header: This is the body.
             It is not folded.
            """)
        for end in ('\n', '\r', '\r\n'):  # check NL, CR, and CRNL line-endings
            msg = AnymailInboundMessage.parse_raw_mime(raw.replace('\n', end))
            self.assertEqual(msg['Subject'],
                             "This subject uses header folding")
            self.assertEqual(
                msg["X-Json"],
                '{"problematic": ["encoded newline\\n", "comma,semi;no space"]}'
            )
            self.assertEqual(
                msg.get_content_text(),
                "Not-A-Header: This is the body.{end} It is not folded.{end}".
                format(end=end))
            self.assertEqual(msg.defects, [])
Пример #6
0
 def test_parse_raw_mime_8bit_utf8(self):
     # In come cases, the message below ends up with 'Content-Transfer-Encoding: 8bit',
     # so needs to be parsed as bytes, not text (see https://bugs.python.org/issue18271).
     # Message.as_string() returns str (text), not bytes.
     # (This might be a Django bug; plain old MIMEText avoids the problem by using
     # 'Content-Transfer-Encoding: base64', which parses fine as text or bytes.)
     # Either way, AnymailInboundMessage should try to sidestep the whole issue.
     raw = SafeMIMEText("Unicode ✓", "plain", "utf-8").as_string()
     msg = AnymailInboundMessage.parse_raw_mime(raw)
     self.assertEqual(msg.text, "Unicode ✓")  # *not* "Unicode \\u2713"
Пример #7
0
    def test_parse_raw_mime(self):
        # (we're not trying to exhaustively test email.parser MIME handling here;
        # just that AnymailInboundMessage.parse_raw_mime calls it correctly)
        raw = dedent("""\
            Content-Type: text/plain
            Subject: This is a test message

            This is a test body.
            """)
        msg = AnymailInboundMessage.parse_raw_mime(raw)
        self.assertEqual(msg['Subject'], "This is a test message")
        self.assertEqual(msg.get_content_text(), "This is a test body.\n")
        self.assertEqual(msg.defects, [])
Пример #8
0
    def test_parse_encoded_headers(self):
        # RFC2047 header encoding
        raw = dedent("""\
            Content-Type: text/plain
            From: =?US-ASCII?Q?Keith_Moore?= <*****@*****.**>
            To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <*****@*****.**>,
             =?ISO-8859-1?Q?Andr=E9?= "Pirard, Jr." <*****@*****.**>
            Cc: =?utf-8?b?TmfGsOG7nWkgbmjhuq1u?= <*****@*****.**>
            Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
             =?utf-8?q?u_understand_the_example=E2=9C=93?=
            X-Broken: =?utf-8?q?Not_a_char:_=88.?=

            Some examples adapted from http://dogmamix.com/MimeHeadersDecoder/
            """)
        msg = AnymailInboundMessage.parse_raw_mime(raw)

        self.assertEqual(msg["From"], "Keith Moore <*****@*****.**>")
        self.assertEqual(msg.from_email.display_name, "Keith Moore")
        self.assertEqual(msg.from_email.addr_spec, "*****@*****.**")

        # When an RFC2047 encoded-word abuts an RFC5322 quoted-word in a *structured* header,
        # Python 3's parser nicely recombines them into a single quoted word. That's way too
        # complicated for our Python 2 workaround ...
        self.assertIn(
            msg["To"],
            [  # `To` header will decode to one of these:
                'Keld Jørn Simonsen <*****@*****.**>, "André Pirard, Jr." <*****@*****.**>',  # Python 3
                'Keld Jørn Simonsen <*****@*****.**>, André "Pirard, Jr." <*****@*****.**>',  # workaround version
            ])
        # ... but the two forms are equivalent, and de-structure the same:
        self.assertEqual(msg.to[0].display_name, "Keld Jørn Simonsen")
        self.assertEqual(
            msg.to[1].display_name,
            "André Pirard, Jr.")  # correct in Python 3 *and* workaround!

        # Note: Like email.headerregistry.Address, Anymail decodes an RFC2047-encoded display_name,
        # but does not decode a punycode domain. (Use `idna.decode(domain)` if you need that.)
        self.assertEqual(msg["Cc"], "Người nhận <*****@*****.**>")
        self.assertEqual(msg.cc[0].display_name, "Người nhận")
        self.assertEqual(msg.cc[0].addr_spec, "*****@*****.**")
        self.assertEqual(msg.cc[0].domain, "xn--th-e0a.example.com")

        # Subject breaks between 'o' and 'u' in the word "you", must be re-joined without space.
        # Also tests joining encoded words with different charsets:
        self.assertEqual(
            msg["Subject"],
            "If you can read this you understand the example\N{CHECK MARK}")

        # Replace illegal encodings (rather than causing error):
        self.assertEqual(msg["X-Broken"],
                         "Not a char: \N{REPLACEMENT CHARACTER}.")
Пример #9
0
    def test_parse_rfc822_attachment_from_raw_mime(self):
        # message/rfc822 attachments should be parsed recursively
        raw = dedent("""\
            MIME-Version: 1.0
            From: [email protected]
            Subject: Undeliverable
            To: [email protected]
            Content-Type: multipart/mixed; boundary="boundary-bounce"

            --boundary-bounce
            Content-Type: text/plain

            Your message was undeliverable due to carrier pigeon strike.
            The original message is attached.

            --boundary-bounce
            Content-Type: message/rfc822
            Content-Disposition: attachment

            {original_raw_message}
            --boundary-bounce--
            """).format(original_raw_message=self.original_raw_message)

        msg = AnymailInboundMessage.parse_raw_mime(raw)
        self.assertIsInstance(msg, AnymailInboundMessage)

        att = msg.get_payload(1)
        self.assertIsInstance(att, AnymailInboundMessage)
        self.assertEqual(att.get_content_type(), "message/rfc822")
        self.assertTrue(att.is_attachment())

        orig_msg = att.get_payload(0)
        self.assertIsInstance(orig_msg, AnymailInboundMessage)
        self.assertEqual(orig_msg['Subject'], "Original message")
        self.assertEqual(orig_msg.get_content_type(), "multipart/related")
        self.assertEqual(att.get_content_text(), self.original_raw_message)

        orig_inline_att = orig_msg.get_payload(1)
        self.assertEqual(orig_inline_att.get_content_type(), "image/png")
        self.assertTrue(orig_inline_att.is_inline_attachment())
        self.assertEqual(orig_inline_att.get_payload(decode=True),
                         SAMPLE_IMAGE_CONTENT)
Пример #10
0
    def test_parse_encoded_headers(self):
        # RFC2047 header encoding
        raw = dedent("""\
            Content-Type: text/plain
            From: =?US-ASCII?Q?Keith_Moore?= <*****@*****.**>
            To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <*****@*****.**>,
             =?ISO-8859-1?Q?Andr=E9?= "Pirard, Jr." <*****@*****.**>
            Cc: =?utf-8?b?TmfGsOG7nWkgbmjhuq1u?= <*****@*****.**>
            Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
             =?utf-8?q?u_understand_the_example=E2=9C=93?=
            X-Broken: =?utf-8?q?Not_a_char:_=88.?=

            Some examples adapted from http://dogmamix.com/MimeHeadersDecoder/
            """)
        msg = AnymailInboundMessage.parse_raw_mime(raw)

        self.assertEqual(msg["From"], "Keith Moore <*****@*****.**>")
        self.assertEqual(msg.from_email.display_name, "Keith Moore")
        self.assertEqual(msg.from_email.addr_spec, "*****@*****.**")

        self.assertEqual(
            msg["To"], 'Keld Jørn Simonsen <*****@*****.**>, '
            '"André Pirard, Jr." <*****@*****.**>')
        self.assertEqual(msg.to[0].display_name, "Keld Jørn Simonsen")
        self.assertEqual(msg.to[1].display_name, "André Pirard, Jr.")

        # Note: Like email.headerregistry.Address, Anymail decodes an RFC2047-encoded display_name,
        # but does not decode a punycode domain. (Use `idna.decode(domain)` if you need that.)
        self.assertEqual(msg["Cc"], "Người nhận <*****@*****.**>")
        self.assertEqual(msg.cc[0].display_name, "Người nhận")
        self.assertEqual(msg.cc[0].addr_spec, "*****@*****.**")
        self.assertEqual(msg.cc[0].domain, "xn--th-e0a.example.com")

        # Subject breaks between 'o' and 'u' in the word "you", must be re-joined without space.
        # Also tests joining encoded words with different charsets:
        self.assertEqual(
            msg["Subject"],
            "If you can read this you understand the example\N{CHECK MARK}")

        # Replace illegal encodings (rather than causing error):
        self.assertEqual(msg["X-Broken"],
                         "Not a char: \N{REPLACEMENT CHARACTER}.")
Пример #11
0
    def test_attachment_as_uploaded_file(self):
        raw = dedent("""\
            MIME-Version: 1.0
            Subject: Attachment test
            Content-Type: multipart/mixed; boundary="this_is_a_boundary"

            --this_is_a_boundary
            Content-Type: text/plain; charset="UTF-8"

            The test sample image is attached below.

            --this_is_a_boundary
            Content-Type: image/png; name="sample_image.png"
            Content-Disposition: attachment; filename="sample_image.png"
            Content-Transfer-Encoding: base64

            iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz
            AAALEgAACxIB0t1+/AAAABR0RVh0Q3JlYXRpb24gVGltZQAzLzEvMTNoZNRjAAAAHHRFWHRTb2Z0
            d2FyZQBBZG9iZSBGaXJld29ya3MgQ1M1cbXjNgAAAZ1JREFUWIXtl7FKA0EQhr+TgIFgo5BXyBUp
            fIGksLawUNAXWFFfwCJgBAtfIJFMLXgQn8BSwdpCiPcKAdOIoI2x2Dmyd7kYwXhp9odluX/uZv6d
            nZu7DXowxiKZi0IAUHKCvxcsoAIEpST4IawVGb0Hb0BlpcigefACvAAvwAsoTTGGlwwzBAyivLUP
            EZrOM10AhGOH2wWugVVlHoAdhJHrPC8DNR0JGsAAQ9mxNzBOMNjS4Qrq69U5EKmf12ywWVsQI4QI
            IbCn3Gnmnk7uk1bokfooI7QRDlQIGCdzPwiYh0idtXNs2zq3UqwVEiDcu/R0DVjUnFpItuPSscfA
            FXCGSfEAdZ2fVeQ68OjYWwi3ycVvMhABGwgfKXZScHeZ+4c6VzN8FbuYukvOykCs+z8PJ0xqIXYE
            d4ALoKlVH2IIgUHWwd/6gNAFPjPcCPvKNTDcYAj1lXzKc7GIRrSZI6yJzcQ+dtV9bD+IkHThBj34
            4j9/yYxupaQbXPJLNqsGFgeZ6qwpLP1b4AV4AV5AoKfjpR5OwR6VKwULCAC+AQV4W9Ps4uZQAAAA
            AElFTkSuQmCC
            --this_is_a_boundary--
            """)

        msg = AnymailInboundMessage.parse_raw_mime(raw)
        attachment = msg.attachments[0]
        attachment_file = attachment.as_uploaded_file()

        self.assertEqual(attachment_file.name, "sample_image.png")
        self.assertEqual(attachment_file.content_type, "image/png")
        self.assertEqual(attachment_file.read(), SAMPLE_IMAGE_CONTENT)