def test_invalid_charset(self): """Test decoding with different and invalid charsets.""" charset = 'utf16' resp = CharsetTestCase._create_response( data=CharsetTestCase.LATIN1_BYTES) # Ignore WARNING: Encoding "utf16" requested but "utf-8" received with patch('pywikibot.warning'): with self.assertRaisesRegex(UnicodeDecodeError, self.CODEC_CANT_DECODE_RE): http._decide_encoding(resp, charset) self.assertEqual(resp.content, CharsetTestCase.LATIN1_BYTES) try: resp.encoding = http._decide_encoding(resp, charset) except UnicodeDecodeError as e: resp.encoding = e with patch('pywikibot.error'): with self.assertRaisesRegex(UnicodeDecodeError, self.CODEC_CANT_DECODE_RE): http.error_handling_callback(resp) # TODO: this is a breaking change # self.assertRaisesRegex( # UnicodeDecodeError, self.CODEC_CANT_DECODE_RE, lambda: resp.text) # Response() would do: # encoding = UnicodeDecodeError -> str(self.content, errors='replace') self.assertEqual(resp.text, str(resp.content, errors='replace')) # encoding = None -> str(resp.content, resp.encoding, errors='replace') resp.encoding = None self.assertEqual( resp.text, str(resp.content, resp.apparent_encoding, errors='replace'))
def test_same_charset(self): """Test decoding with explicit and equal charsets.""" charset = 'utf-8' resp = CharsetTestCase._create_response() resp.encoding = http._decide_encoding(resp, charset) self.assertEqual('utf-8', resp.encoding) self.assertEqual(resp.content, CharsetTestCase.UTF8_BYTES) self.assertEqual(resp.text, CharsetTestCase.STR)
def test_content_type_xml_with_variant_charset(self): """Test xml content with latin1 encoding given in content.""" charset = None resp = CharsetTestCase._create_response( headers={'content-type': 'application/xml'}, data="<?xml version='1.0' encoding='latin1'?>".encode('latin1')) resp.encoding = http._decide_encoding(resp, charset) self.assertEqual('latin1', resp.encoding)
def test_content_type_xml_with_charset(self): """Test xml content with utf-8 encoding given in content.""" charset = None resp = CharsetTestCase._create_response( headers={'content-type': 'application/xml'}, data='<?xml version="1.0" encoding="UTF-8"?>'.encode('utf-8')) resp.encoding = http._decide_encoding(resp, charset) self.assertEqual('UTF-8', resp.encoding)
def test_content_type_xml_without_charset(self): """Test decoding without explicit charset but xml content.""" charset = None resp = CharsetTestCase._create_response( headers={'content-type': 'application/xml'}, data=CharsetTestCase.UTF8_BYTES) resp.encoding = http._decide_encoding(resp, charset) self.assertEqual('utf-8', resp.encoding)
def test_no_charset(self): """Test decoding without explicit charset.""" charset = None resp = CharsetTestCase._create_response( headers={'content-type': ''}, data=CharsetTestCase.LATIN1_BYTES) resp.encoding = http._decide_encoding(resp, charset) self.assertEqual('latin1', resp.encoding) self.assertEqual(resp.content, CharsetTestCase.LATIN1_BYTES) self.assertEqual(resp.text, CharsetTestCase.STR)
def test_header_charset(self): """Test decoding with different charsets and valid header charset.""" charset = 'latin1' resp = CharsetTestCase._create_response() resp.encoding = http._decide_encoding(resp, charset) # Ignore WARNING: Encoding "latin1" requested but "utf-8" received with patch('pywikibot.warning'): self.assertEqual('utf-8', resp.encoding) self.assertEqual(resp.content, CharsetTestCase.UTF8_BYTES) self.assertEqual(resp.text, CharsetTestCase.STR)
def test_invalid_charset(self): """Test decoding with different and invalid charsets.""" invalid_charsets = ('utf16', 'win-1251') for charset in invalid_charsets: with self.subTest(charset=charset): resp = CharsetTestCase._create_response( data=CharsetTestCase.LATIN1_BYTES) with patch('pywikibot.warning'): # Ignore WARNING: resp.encoding = http._decide_encoding(resp, charset) self.assertIsNone(resp.encoding) self.assertIsNotNone(resp.apparent_encoding) self.assertEqual(resp.content, CharsetTestCase.LATIN1_BYTES) # test Response.apparent_encoding self.assertEqual(resp.text, str(resp.content, resp.apparent_encoding, errors='replace'))