Python load_teststring示例，test.test_multibytecodec_support.load_teststring Python示例

示例#1

0

显示文件

class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'euc_kr'
    tstring = test_multibytecodec_support.load_teststring('euc_kr')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"),

        # composed make-up sequence errors
        ("\xa4\xd4", "strict", None),
        ("\xa4\xd4\xa4", "strict", None),
        ("\xa4\xd4\xa4\xb6", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", u"\uc4d4"),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", u"\uc4d4x"),
        ("a\xa4\xd4\xa4\xb6\xa4", "replace", u"a\ufffd"),
        ("\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None),
        ("\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", u"\ufffd"),
        ("\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", u"\ufffd"),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", u"\ufffd"),
        ("\xc1\xc4", "strict", u"\uc894"),
    )

示例#2

0

显示文件

class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis'
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
        (b"\\\x7e", "strict", "\\\x7e"),
        (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
    )

示例#3

0

显示文件

class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'hz'
    tstring = test_multibytecodec_support.load_teststring('hz')
    codectests = (
        # test '~\n' (3 lines)
        (b'This sentence is in ASCII.\n'
         b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
         b'~{NpJ)l6HK!#~}Bye.\n',
         'strict',
         u'This sentence is in ASCII.\n'
         u'The next sentence is in GB.'
         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
         u'Bye.\n'),
        # test '~\n' (4 lines)
        (b'This sentence is in ASCII.\n'
         b'The next sentence is in GB.~\n'
         b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
         b'Bye.\n',
         'strict',
         u'This sentence is in ASCII.\n'
         u'The next sentence is in GB.'
         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
         u'Bye.\n'),
        # invalid bytes
        (b'ab~cd', 'replace', u'ab\uFFFDd'),
        (b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
        (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
    )

示例#4

0

显示文件

class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'big5'
    tstring = test_multibytecodec_support.load_teststring('big5')
    if RESYNC_FASTER:
        # Version from CPython 3.6 where \0x80\0x80 is two invalid sequences.
        # Java 8 agrees with this interpretation.
        codectests = (
            # invalid bytes
            (b"abc\x80\x80\xc1\xc4", "strict", None),
            (b"abc\xc8", "strict", None),
            (b"abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ufffd\u8b10"),
            (b"abc\x80\x80\xc1\xc4\xc8", "replace",
             u"abc\ufffd\ufffd\u8b10\ufffd"),
            (b"abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
        )
    else:
        # Standard version of test from CPython 2.7
        codectests = (
            # invalid bytes
            ("abc\x80\x80\xc1\xc4", "strict", None),
            ("abc\xc8", "strict", None),
            ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
            ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
            ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
        )

示例#5

0

显示文件

class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
                         unittest.TestCase):
    encoding = 'euc_jp'
    tstring = test_multibytecodec_support.load_teststring('euc_jp')
    codectests = eucjp_commontests + (
        ("\xa1\xc0\\", "strict", u"\uff3c\\"),
        (u"\xa5", "strict", "\x5c"),
        (u"\u203e", "strict", "\x7e"),
    )

示例#6

0

显示文件

class Test_ISO2022_KR(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'iso2022_kr'
    tstring = test_multibytecodec_support.load_teststring('iso2022_kr')
    codectests = COMMON_CODEC_TESTS + (
        (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), )

    # iso2022_kr.txt cannot be used to test "chunk coding": the escape
    # sequence is only written on the first line
    def test_chunkcoding(self):
        pass

示例#7

0

显示文件

文件： test_codecencodings_tw.py 项目： moussmaw1/python3.0-experiments

class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'big5'
    tstring = test_multibytecodec_support.load_teststring('big5')
    codectests = (
        # invalid bytes
        (b"abc\x80\x80\xc1\xc4", "strict", None),
        (b"abc\xc8", "strict", None),
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
    )

示例#8

0

显示文件

class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'johab'
    tstring = test_multibytecodec_support.load_teststring('johab')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ucd27"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\ucd27\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\ucd27"),
    )

示例#9

0

显示文件

class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp949'
    tstring = test_multibytecodec_support.load_teststring('cp949')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"),
    )

示例#10

0

显示文件

文件： test_codecencodings_cn.py 项目： heroarthur/Minix

class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb2312'
    tstring = test_multibytecodec_support.load_teststring('gb2312')
    codectests = (
        # invalid bytes
        ("abc\x81\x81\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x81\x81\xc1\xc4", "ignore", u"abc\u804a"),
        ("\xc1\x64", "strict", None),
    )

示例#11

0

显示文件

文件： test_codecencodings_cn.py 项目： heroarthur/Minix

class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gbk'
    tstring = test_multibytecodec_support.load_teststring('gbk')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
        ("\x83\x34\x83\x31", "strict", None),
        (u"\u30fb", "strict", None),
    )

示例#12

0

显示文件

class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb18030'
    tstring = test_multibytecodec_support.load_teststring('gb18030')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
        ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
    )
    has_iso10646 = True

示例#13

0

显示文件

class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp932'
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = (
        # invalid bytes
        ("abc\x81\x00\x81\x00\x82\x84", "strict", None),
        ("abc\xf8", "strict", None),
        ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"),
        ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
        ("abc\x81\x00\x82\x84", "ignore", u"abc\uff44"),
        # sjis vs cp932
        ("\\\x7e", "replace", u"\\\x7e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"),
    )

示例#14

0

显示文件

class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jisx0213'
    tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\x82\x84", "strict", None),
        ("abc\xf8", "strict", None),
        ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
        ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
        ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"),
        # sjis vs cp932
        ("\\\x7e", "replace", u"\xa5\u203e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
    )
    xmlcharnametest = (u"\xab\u211c\xbb = \u2329\u1234\u232a",
                       "\x85G&real;\x85Q = &lang;&#4660;&rang;")

示例#15

0

显示文件

class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
                        unittest.TestCase):
    encoding = 'euc_jisx0213'
    tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"),
        ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
        ("\xc1\x64", "strict", None),
        ("\xa1\xc0", "strict", u"\uff3c"),
    )
    xmlcharnametest = (u"\xab\u211c\xbb = \u2329\u1234\u232a",
                       "\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;")

示例#16

0

显示文件

class Test_ISO2022_JP2(test_multibytecodec_support.TestBase,
                       unittest.TestCase):
    encoding = 'iso2022_jp_2'
    tstring = test_multibytecodec_support.load_teststring('iso2022_jp')
    codectests = COMMON_CODEC_TESTS + ((b'ab\x1BNdef', 'replace', 'abdef'), )