def test_equals(self): cps = CodepointSet('0000..00FF') self.assertEqual(cps, CodepointSet('0000..00FF')) self.assertNotEqual(cps, CodepointSet('0000..00FE')) # Non-CodepointSet always not-equal. self.assertFalse(cps == 'what?')
def test_parse(self): cps = CodepointSet('A\nBB\n') self.assertEqual(repr(cps), r"CodepointSet('000A\n00BB')") cps = CodepointSet('AAA\nBBB..CCC\n') self.assertEqual(repr(cps), r"CodepointSet('0AAA\n0BBB..0CCC')") cps = CodepointSet('\n \n # comment \n \n') self.assertEqual(repr(cps), "CodepointSet('')")
def test_malformed_range(self): with self.assertRaises(ValueError): CodepointSet('0002..0000\n0001') with self.assertRaises(ValueError): CodepointSet('0000..0001\n0000..0001\n0002') with self.assertRaises(ValueError): CodepointSet('0000\n0002\n0002..0004') with self.assertRaises(ValueError): CodepointSet('110000') with self.assertRaises(ValueError): CodepointSet('0000\n000G')
def test_len(self): cps = CodepointSet('0000\n') self.assertEqual(len(cps), 1) cps = CodepointSet('0000..0001\n') self.assertEqual(len(cps), 2) cps = CodepointSet('0000\n0001\n0002') self.assertEqual(len(cps), 3) cps = CodepointSet('0000\n0002') self.assertEqual(len(cps), 2) cps = CodepointSet('10000..10FFFF') self.assertEqual(len(cps), 0x10FFFF - 0x10000 + 1)
def test_even_odd(self): data = '\n'.join("%04X" % cp for cp in range(0, 10000, 2)) cps = CodepointSet(data) for cp in range(10000): if cp in cps: self.assertTrue((cp % 2) == 0) else: self.assertFalse((cp % 2) == 0)
def test_contains(self): cps = CodepointSet('0000\n') actual = [cp in cps for cp in range(-1, 4)] self.assertEqual(actual, [False, True, False, False, False]) self.assertFalse(0x010FFFF in cps) cps = CodepointSet('0000..0001\n') actual = [cp in cps for cp in range(-1, 4)] self.assertEqual(actual, [False, True, True, False, False]) cps = CodepointSet('0000\n0001\n0002') actual = [cp in cps for cp in range(-1, 4)] self.assertEqual(actual, [False, True, True, True, False]) cps = CodepointSet('0000\n0002') actual = [cp in cps for cp in range(-1, 4)] self.assertEqual(actual, [False, True, False, True, False]) cps = CodepointSet('10000..10FFFF') self.assertTrue(0x10FFFF in cps) self.assertFalse(0x110000 in cps)
def test_repr(self): cps = CodepointSet('') self.assertEqual(repr(cps), "CodepointSet('')") cps = CodepointSet('0000') self.assertEqual(repr(cps), "CodepointSet('0000')") cps = CodepointSet('0000..00FF') self.assertEqual(repr(cps), "CodepointSet('0000..00FF')") cps = CodepointSet('0001..FFFF\n100000..10FFFF') self.assertEqual(repr(cps), r"CodepointSet('0001..FFFF\n100000..10FFFF')") cps = CodepointSet('FFFF..1FFFF') self.assertEqual(repr(cps), "CodepointSet('FFFF..1FFFF')") cps = CodepointSet('10000..1FFFF') self.assertEqual(repr(cps), "CodepointSet('10000..1FFFF')") cps = CodepointSet('FFFE\n10000..1FFFF') self.assertEqual(repr(cps), r"CodepointSet('FFFE\n10000..1FFFF')")
# http://www.unicode.org/Public/10.0.0/ucd/DerivedCoreProperties.txt # Derived Property: Default_Ignorable_Code_Point _DEFAULT_IGNORABLE = CodepointSet(''' 00AD 034F 061C 115F..1160 17B4..17B5 180B..180D 180E 200B..200F 202A..202E 2060..2064 2065 2066..206F 3164 FE00..FE0F FEFF FFA0 FFF0..FFF8 1BCA0..1BCA3 1D173..1D17A E0000 E0001 E0002..E001F E0020..E007F E0080..E00FF E0100..E01EF E01F0..E0FFF ''') assert len(_DEFAULT_IGNORABLE) == 4173
def test_coalesce(self): cps = CodepointSet('0000\n0001\n0002') self.assertEqual(cps, CodepointSet('0000..0002')) cps = CodepointSet('0000\n0002\n0003..0004') self.assertEqual(cps, CodepointSet('0000\n0002..0004'))