示例#1
0
class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'euc_kr'
    tstring = test_multibytecodec_support.load_teststring('euc_kr')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"),

        # composed make-up sequence errors
        ("\xa4\xd4", "strict", None),
        ("\xa4\xd4\xa4", "strict", None),
        ("\xa4\xd4\xa4\xb6", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", u"\uc4d4"),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", u"\uc4d4x"),
        ("a\xa4\xd4\xa4\xb6\xa4", "replace", u"a\ufffd"),
        ("\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None),
        ("\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", u"\ufffd"),
        ("\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", u"\ufffd"),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", u"\ufffd"),
        ("\xc1\xc4", "strict", u"\uc894"),
    )
示例#2
0
class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis'
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
        (b"\\\x7e", "strict", "\\\x7e"),
        (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
    )
示例#3
0
class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'hz'
    tstring = test_multibytecodec_support.load_teststring('hz')
    codectests = (
        # test '~\n' (3 lines)
        (b'This sentence is in ASCII.\n'
         b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
         b'~{NpJ)l6HK!#~}Bye.\n',
         'strict',
         u'This sentence is in ASCII.\n'
         u'The next sentence is in GB.'
         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
         u'Bye.\n'),
        # test '~\n' (4 lines)
        (b'This sentence is in ASCII.\n'
         b'The next sentence is in GB.~\n'
         b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
         b'Bye.\n',
         'strict',
         u'This sentence is in ASCII.\n'
         u'The next sentence is in GB.'
         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
         u'Bye.\n'),
        # invalid bytes
        (b'ab~cd', 'replace', u'ab\uFFFDd'),
        (b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
        (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
    )
示例#4
0
class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'big5'
    tstring = test_multibytecodec_support.load_teststring('big5')
    if RESYNC_FASTER:
        # Version from CPython 3.6 where \0x80\0x80 is two invalid sequences.
        # Java 8 agrees with this interpretation.
        codectests = (
            # invalid bytes
            (b"abc\x80\x80\xc1\xc4", "strict", None),
            (b"abc\xc8", "strict", None),
            (b"abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ufffd\u8b10"),
            (b"abc\x80\x80\xc1\xc4\xc8", "replace",
             u"abc\ufffd\ufffd\u8b10\ufffd"),
            (b"abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
        )
    else:
        # Standard version of test from CPython 2.7
        codectests = (
            # invalid bytes
            ("abc\x80\x80\xc1\xc4", "strict", None),
            ("abc\xc8", "strict", None),
            ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
            ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
            ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
        )
示例#5
0
class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
                         unittest.TestCase):
    encoding = 'euc_jp'
    tstring = test_multibytecodec_support.load_teststring('euc_jp')
    codectests = eucjp_commontests + (
        ("\xa1\xc0\\", "strict", u"\uff3c\\"),
        (u"\xa5", "strict", "\x5c"),
        (u"\u203e", "strict", "\x7e"),
    )
示例#6
0
class Test_ISO2022_KR(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'iso2022_kr'
    tstring = test_multibytecodec_support.load_teststring('iso2022_kr')
    codectests = COMMON_CODEC_TESTS + (
        (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), )

    # iso2022_kr.txt cannot be used to test "chunk coding": the escape
    # sequence is only written on the first line
    def test_chunkcoding(self):
        pass
class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'big5'
    tstring = test_multibytecodec_support.load_teststring('big5')
    codectests = (
        # invalid bytes
        (b"abc\x80\x80\xc1\xc4", "strict", None),
        (b"abc\xc8", "strict", None),
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
    )
示例#8
0
class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'johab'
    tstring = test_multibytecodec_support.load_teststring('johab')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ucd27"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\ucd27\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\ucd27"),
    )
示例#9
0
class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp949'
    tstring = test_multibytecodec_support.load_teststring('cp949')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"),
    )
示例#10
0
class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb2312'
    tstring = test_multibytecodec_support.load_teststring('gb2312')
    codectests = (
        # invalid bytes
        ("abc\x81\x81\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x81\x81\xc1\xc4", "ignore", u"abc\u804a"),
        ("\xc1\x64", "strict", None),
    )
示例#11
0
class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gbk'
    tstring = test_multibytecodec_support.load_teststring('gbk')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
        ("\x83\x34\x83\x31", "strict", None),
        (u"\u30fb", "strict", None),
    )
示例#12
0
class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb18030'
    tstring = test_multibytecodec_support.load_teststring('gb18030')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
        ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
    )
    has_iso10646 = True
示例#13
0
class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp932'
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = (
        # invalid bytes
        ("abc\x81\x00\x81\x00\x82\x84", "strict", None),
        ("abc\xf8", "strict", None),
        ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"),
        ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
        ("abc\x81\x00\x82\x84", "ignore", u"abc\uff44"),
        # sjis vs cp932
        ("\\\x7e", "replace", u"\\\x7e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"),
    )
示例#14
0
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jisx0213'
    tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\x82\x84", "strict", None),
        ("abc\xf8", "strict", None),
        ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
        ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
        ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"),
        # sjis vs cp932
        ("\\\x7e", "replace", u"\xa5\u203e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
    )
    xmlcharnametest = (u"\xab\u211c\xbb = \u2329\u1234\u232a",
                       "\x85G&real;\x85Q = &lang;&#4660;&rang;")
示例#15
0
class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
                        unittest.TestCase):
    encoding = 'euc_jisx0213'
    tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"),
        ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
        ("\xc1\x64", "strict", None),
        ("\xa1\xc0", "strict", u"\uff3c"),
    )
    xmlcharnametest = (u"\xab\u211c\xbb = \u2329\u1234\u232a",
                       "\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;")
示例#16
0
class Test_ISO2022_JP2(test_multibytecodec_support.TestBase,
                       unittest.TestCase):
    encoding = 'iso2022_jp_2'
    tstring = test_multibytecodec_support.load_teststring('iso2022_jp')
    codectests = COMMON_CODEC_TESTS + ((b'ab\x1BNdef', 'replace', 'abdef'), )