Exemplo n.º 1
0
    def test_invalid_charset(self):
        """Test decoding with different and invalid charsets."""
        charset = 'utf16'
        resp = CharsetTestCase._create_response(
            data=CharsetTestCase.LATIN1_BYTES)
        # Ignore WARNING: Encoding "utf16" requested but "utf-8" received
        with patch('pywikibot.warning'):
            with self.assertRaisesRegex(UnicodeDecodeError,
                                        self.CODEC_CANT_DECODE_RE):
                http._decide_encoding(resp, charset)
        self.assertEqual(resp.content, CharsetTestCase.LATIN1_BYTES)

        try:
            resp.encoding = http._decide_encoding(resp, charset)
        except UnicodeDecodeError as e:
            resp.encoding = e

        with patch('pywikibot.error'):
            with self.assertRaisesRegex(UnicodeDecodeError,
                                        self.CODEC_CANT_DECODE_RE):
                http.error_handling_callback(resp)

        # TODO: this is a breaking change
        # self.assertRaisesRegex(
        #     UnicodeDecodeError, self.CODEC_CANT_DECODE_RE, lambda: resp.text)

        # Response() would do:
        # encoding = UnicodeDecodeError -> str(self.content, errors='replace')
        self.assertEqual(resp.text, str(resp.content, errors='replace'))
        # encoding = None -> str(resp.content, resp.encoding, errors='replace')
        resp.encoding = None
        self.assertEqual(
            resp.text,
            str(resp.content, resp.apparent_encoding, errors='replace'))
Exemplo n.º 2
0
 def test_same_charset(self):
     """Test decoding with explicit and equal charsets."""
     charset = 'utf-8'
     resp = CharsetTestCase._create_response()
     resp.encoding = http._decide_encoding(resp, charset)
     self.assertEqual('utf-8', resp.encoding)
     self.assertEqual(resp.content, CharsetTestCase.UTF8_BYTES)
     self.assertEqual(resp.text, CharsetTestCase.STR)
Exemplo n.º 3
0
 def test_content_type_xml_with_variant_charset(self):
     """Test xml content with latin1 encoding given in content."""
     charset = None
     resp = CharsetTestCase._create_response(
         headers={'content-type': 'application/xml'},
         data="<?xml version='1.0' encoding='latin1'?>".encode('latin1'))
     resp.encoding = http._decide_encoding(resp, charset)
     self.assertEqual('latin1', resp.encoding)
Exemplo n.º 4
0
 def test_content_type_xml_with_charset(self):
     """Test xml content with utf-8 encoding given in content."""
     charset = None
     resp = CharsetTestCase._create_response(
         headers={'content-type': 'application/xml'},
         data='<?xml version="1.0" encoding="UTF-8"?>'.encode('utf-8'))
     resp.encoding = http._decide_encoding(resp, charset)
     self.assertEqual('UTF-8', resp.encoding)
Exemplo n.º 5
0
 def test_content_type_xml_without_charset(self):
     """Test decoding without explicit charset but xml content."""
     charset = None
     resp = CharsetTestCase._create_response(
         headers={'content-type': 'application/xml'},
         data=CharsetTestCase.UTF8_BYTES)
     resp.encoding = http._decide_encoding(resp, charset)
     self.assertEqual('utf-8', resp.encoding)
Exemplo n.º 6
0
 def test_no_charset(self):
     """Test decoding without explicit charset."""
     charset = None
     resp = CharsetTestCase._create_response(
         headers={'content-type': ''}, data=CharsetTestCase.LATIN1_BYTES)
     resp.encoding = http._decide_encoding(resp, charset)
     self.assertEqual('latin1', resp.encoding)
     self.assertEqual(resp.content, CharsetTestCase.LATIN1_BYTES)
     self.assertEqual(resp.text, CharsetTestCase.STR)
Exemplo n.º 7
0
 def test_header_charset(self):
     """Test decoding with different charsets and valid header charset."""
     charset = 'latin1'
     resp = CharsetTestCase._create_response()
     resp.encoding = http._decide_encoding(resp, charset)
     # Ignore WARNING: Encoding "latin1" requested but "utf-8" received
     with patch('pywikibot.warning'):
         self.assertEqual('utf-8', resp.encoding)
     self.assertEqual(resp.content, CharsetTestCase.UTF8_BYTES)
     self.assertEqual(resp.text, CharsetTestCase.STR)
Exemplo n.º 8
0
    def test_invalid_charset(self):
        """Test decoding with different and invalid charsets."""
        invalid_charsets = ('utf16', 'win-1251')
        for charset in invalid_charsets:
            with self.subTest(charset=charset):
                resp = CharsetTestCase._create_response(
                    data=CharsetTestCase.LATIN1_BYTES)

                with patch('pywikibot.warning'):  # Ignore WARNING:
                    resp.encoding = http._decide_encoding(resp, charset)
                self.assertIsNone(resp.encoding)
                self.assertIsNotNone(resp.apparent_encoding)
                self.assertEqual(resp.content, CharsetTestCase.LATIN1_BYTES)

                # test Response.apparent_encoding
                self.assertEqual(resp.text, str(resp.content,
                                                resp.apparent_encoding,
                                                errors='replace'))