Пример #1
0
    def test_equals(self):
        cps = CodepointSet('0000..00FF')
        self.assertEqual(cps, CodepointSet('0000..00FF'))
        self.assertNotEqual(cps, CodepointSet('0000..00FE'))

        # Non-CodepointSet always not-equal.
        self.assertFalse(cps == 'what?')
Пример #2
0
    def test_parse(self):
        cps = CodepointSet('A\nBB\n')
        self.assertEqual(repr(cps), r"CodepointSet('000A\n00BB')")

        cps = CodepointSet('AAA\nBBB..CCC\n')
        self.assertEqual(repr(cps), r"CodepointSet('0AAA\n0BBB..0CCC')")

        cps = CodepointSet('\n  \n # comment  \n   \n')
        self.assertEqual(repr(cps), "CodepointSet('')")
Пример #3
0
    def test_malformed_range(self):
        with self.assertRaises(ValueError):
            CodepointSet('0002..0000\n0001')

        with self.assertRaises(ValueError):
            CodepointSet('0000..0001\n0000..0001\n0002')

        with self.assertRaises(ValueError):
            CodepointSet('0000\n0002\n0002..0004')

        with self.assertRaises(ValueError):
            CodepointSet('110000')

        with self.assertRaises(ValueError):
            CodepointSet('0000\n000G')
Пример #4
0
    def test_len(self):
        cps = CodepointSet('0000\n')
        self.assertEqual(len(cps), 1)

        cps = CodepointSet('0000..0001\n')
        self.assertEqual(len(cps), 2)

        cps = CodepointSet('0000\n0001\n0002')
        self.assertEqual(len(cps), 3)

        cps = CodepointSet('0000\n0002')
        self.assertEqual(len(cps), 2)

        cps = CodepointSet('10000..10FFFF')
        self.assertEqual(len(cps), 0x10FFFF - 0x10000 + 1)
Пример #5
0
 def test_even_odd(self):
     data = '\n'.join("%04X" % cp for cp in range(0, 10000, 2))
     cps = CodepointSet(data)
     for cp in range(10000):
         if cp in cps:
             self.assertTrue((cp % 2) == 0)
         else:
             self.assertFalse((cp % 2) == 0)
Пример #6
0
    def test_contains(self):
        cps = CodepointSet('0000\n')
        actual = [cp in cps for cp in range(-1, 4)]
        self.assertEqual(actual, [False, True, False, False, False])

        self.assertFalse(0x010FFFF in cps)

        cps = CodepointSet('0000..0001\n')
        actual = [cp in cps for cp in range(-1, 4)]
        self.assertEqual(actual, [False, True, True, False, False])

        cps = CodepointSet('0000\n0001\n0002')
        actual = [cp in cps for cp in range(-1, 4)]
        self.assertEqual(actual, [False, True, True, True, False])

        cps = CodepointSet('0000\n0002')
        actual = [cp in cps for cp in range(-1, 4)]
        self.assertEqual(actual, [False, True, False, True, False])

        cps = CodepointSet('10000..10FFFF')
        self.assertTrue(0x10FFFF in cps)
        self.assertFalse(0x110000 in cps)
Пример #7
0
    def test_repr(self):
        cps = CodepointSet('')
        self.assertEqual(repr(cps), "CodepointSet('')")

        cps = CodepointSet('0000')
        self.assertEqual(repr(cps), "CodepointSet('0000')")

        cps = CodepointSet('0000..00FF')
        self.assertEqual(repr(cps), "CodepointSet('0000..00FF')")

        cps = CodepointSet('0001..FFFF\n100000..10FFFF')
        self.assertEqual(repr(cps),
                         r"CodepointSet('0001..FFFF\n100000..10FFFF')")

        cps = CodepointSet('FFFF..1FFFF')
        self.assertEqual(repr(cps), "CodepointSet('FFFF..1FFFF')")

        cps = CodepointSet('10000..1FFFF')
        self.assertEqual(repr(cps), "CodepointSet('10000..1FFFF')")

        cps = CodepointSet('FFFE\n10000..1FFFF')
        self.assertEqual(repr(cps), r"CodepointSet('FFFE\n10000..1FFFF')")
Пример #8
0
# http://www.unicode.org/Public/10.0.0/ucd/DerivedCoreProperties.txt
# Derived Property: Default_Ignorable_Code_Point
_DEFAULT_IGNORABLE = CodepointSet('''
00AD
034F
061C
115F..1160
17B4..17B5
180B..180D
180E
200B..200F
202A..202E
2060..2064
2065
2066..206F
3164
FE00..FE0F
FEFF
FFA0
FFF0..FFF8
1BCA0..1BCA3
1D173..1D17A
E0000
E0001
E0002..E001F
E0020..E007F
E0080..E00FF
E0100..E01EF
E01F0..E0FFF
''')
assert len(_DEFAULT_IGNORABLE) == 4173
Пример #9
0
    def test_coalesce(self):
        cps = CodepointSet('0000\n0001\n0002')
        self.assertEqual(cps, CodepointSet('0000..0002'))

        cps = CodepointSet('0000\n0002\n0003..0004')
        self.assertEqual(cps, CodepointSet('0000\n0002..0004'))