class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'hz' tstring = multibytecodec_support.load_teststring('hz') codectests = ( # test '~\n' (3 lines) (b'This sentence is in ASCII.\n' b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n' b'~{NpJ)l6HK!#~}Bye.\n', 'strict', 'This sentence is in ASCII.\n' 'The next sentence is in GB.' '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 'Bye.\n'), # test '~\n' (4 lines) (b'This sentence is in ASCII.\n' b'The next sentence is in GB.~\n' b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n' b'Bye.\n', 'strict', 'This sentence is in ASCII.\n' 'The next sentence is in GB.' '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 'Bye.\n'), # invalid bytes (b'ab~cd', 'replace', 'ab\uFFFDcd'), (b'ab\xffcd', 'replace', 'ab\uFFFDcd'), (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'), (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'), (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"), # issue 30003 ('ab~cd', 'strict', b'ab~~cd'), # escape ~ (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode )
class Test_EUCKR(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'euc_kr' tstring = multibytecodec_support.load_teststring('euc_kr') codectests = ( # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), (b"abc\x80\x80\xc1\xc4", "replace", 'abc\ufffd\ufffd\uc894'), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"), # composed make-up sequence errors (b"\xa4\xd4", "strict", None), (b"\xa4\xd4\xa4", "strict", None), (b"\xa4\xd4\xa4\xb6", "strict", None), (b"\xa4\xd4\xa4\xb6\xa4", "strict", None), (b"\xa4\xd4\xa4\xb6\xa4\xd0", "strict", None), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", "\uc4d4"), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", "\uc4d4x"), (b"a\xa4\xd4\xa4\xb6\xa4", "replace", 'a\ufffd'), (b"\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None), (b"\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None), (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", '\ufffd\u6e21\ufffd\u3160\ufffd'), (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", '\ufffd\u6e21\ub544\ufffd\ufffd'), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", '\ufffd\u6e21\ub544\u572d\ufffd'), (b"\xa4\xd4\xff\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "replace", '\ufffd\ufffd\ufffd\uc4d4'), (b"\xc1\xc4", "strict", "\uc894"), )
class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'euc_jp' tstring = multibytecodec_support.load_teststring('euc_jp') codectests = euc_commontests + ( ("\xa5", "strict", b"\x5c"), ("\u203e", "strict", b"\x7e"), )
class Test_Big5(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'big5' tstring = multibytecodec_support.load_teststring('big5') codectests = ( # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"), )
class Test_CP949(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'cp949' tstring = multibytecodec_support.load_teststring('cp949') codectests = ( # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\uc894"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"), )
class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'iso2022_kr' tstring = multibytecodec_support.load_teststring('iso2022_kr') codectests = COMMON_CODEC_TESTS + ( (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), ) # iso2022_kr.txt cannot be used to test "chunk coding": the escape # sequence is only written on the first line @unittest.skip('iso2022_kr.txt cannot be used to test "chunk coding"') def test_chunkcoding(self): pass
class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'shift_jis' tstring = multibytecodec_support.load_teststring('shift_jis') codectests = shiftjis_commonenctests + ( (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), (b"\\\x7e", "strict", "\\\x7e"), (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"), (b"abc\x81\x39", "replace", "abc\ufffd9"), (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"), (b"abc\xFF\x58", "replace", "abc\ufffdX"), )
class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'gb2312' tstring = multibytecodec_support.load_teststring('gb2312') codectests = ( # invalid bytes (b"abc\x81\x81\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"), (b"\xc1\x64", "strict", None), )
class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'shift_jisx0213' tstring = multibytecodec_support.load_teststring('shift_jisx0213') codectests = shiftjis_commonenctests + ( (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), # sjis vs cp932 (b"\\\x7e", "replace", "\xa5\u203e"), (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"), ) xmlcharnametest = ("\xab\u211c\xbb = \u2329\u1234\u232a", b"\x85Gℜ\x85Q = ⟨ሴ⟩")
class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'gbk' tstring = multibytecodec_support.load_teststring('gbk') codectests = ( # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), (b"\x83\x34\x83\x31", "strict", None), ("\u30fb", "strict", None), )
class Test_JOHAB(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'johab' tstring = multibytecodec_support.load_teststring('johab') codectests = ( # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\ucd27"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\ucd27\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"), (b"\xD8abc", "replace", "\uFFFDabc"), (b"\xD8\xFFabc", "replace", "\uFFFD\uFFFDabc"), (b"\x84bxy", "replace", "\uFFFDbxy"), (b"\x8CBxy", "replace", "\uFFFDBxy"), )
class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'shift_jis_2004' tstring = multibytecodec_support.load_teststring('shift_jis') codectests = shiftjis_commonenctests + ( (b"\\\x7e", "strict", "\xa5\u203e"), (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"), (b"abc\xEA\xFC", "strict", "abc\u64bf"), (b"\x81\x39xy", "replace", "\ufffd9xy"), (b"\xFF\x58xy", "replace", "\ufffdXxy"), (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"), (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"), (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'), ) xmlcharnametest = ("\xab\u211c\xbb = \u2329\u1234\u232a", b"\x85Gℜ\x85Q = ⟨ሴ⟩")
class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'cp932' tstring = multibytecodec_support.load_teststring('shift_jis') codectests = ( # invalid bytes (b"abc\x81\x00\x81\x00\x82\x84", "strict", None), (b"abc\xf8", "strict", None), (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"), (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"), (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"), (b"ab\xEBxy", "replace", "ab\uFFFDxy"), (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"), (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'), # sjis vs cp932 (b"\\\x7e", "replace", "\\\x7e"), (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"), )
class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'gb18030' tstring = multibytecodec_support.load_teststring('gb18030') codectests = ( # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"), ("\u30fb", "strict", b"\x819\xa79"), (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'), (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'), (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'), # issue29990 (b"\xff\x30\x81\x30", "strict", None), (b"\x81\x30\xff\x30", "strict", None), (b"abc\x81\x39\xff\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"), (b"abc\xab\x36\xff\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'), (b"abc\xbf\x38\xff\x32\xc1\xc4", "ignore", "abc\x38\x32\u804a"), ) has_iso10646 = True
class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'iso2022_jp_2' tstring = multibytecodec_support.load_teststring('iso2022_jp') codectests = COMMON_CODEC_TESTS + ((b'ab\x1BNdef', 'replace', 'abdef'), )
class Test_EUC_JISX0213(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'euc_jisx0213' tstring = multibytecodec_support.load_teststring('euc_jisx0213') codectests = euc_commontests xmlcharnametest = ("\xab\u211c\xbb = \u2329\u1234\u232a", b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩")