def test_iterate(self): subset = UnicodeSubset('a-d') self.assertListEqual(list(iter(subset)), [ord('a'), ord('b'), ord('c'), ord('d')]) self.assertListEqual(list(subset.iter_characters()), ['a', 'b', 'c', 'd'])
def test_max_and_min(self): s1 = UnicodeSubset([10, 51, (89, 151), 90]) s2 = UnicodeSubset([0, 2, (80, 201), 10000]) s3 = UnicodeSubset([1]) self.assertEqual((min(s1), max(s1)), (10, 150)) self.assertEqual((min(s2), max(s2)), (0, 10000)) self.assertEqual((min(s3), max(s3)), (1, 1))
def test_difference_update_method(self): subset = UnicodeSubset('a-z') subset.difference_update('a-c') self.assertEqual(subset, UnicodeSubset('d-z')) subset = UnicodeSubset('a-z') subset.difference_update([(ord('a'), ord('c') + 1)]) self.assertEqual(subset, UnicodeSubset('d-z'))
def test_creation(self): subset = UnicodeSubset([(0, 9), 11, 12, (14, 32), (33, sys.maxunicode + 1)]) self.assertEqual(subset, [(0, 9), 11, 12, (14, 32), (33, sys.maxunicode + 1)]) self.assertEqual(UnicodeSubset('0-9'), [(48, 58)]) self.assertEqual(UnicodeSubset('0-9:'), [(48, 59)]) subset = UnicodeSubset('a-z') self.assertEqual(UnicodeSubset(subset), [(ord('a'), ord('z') + 1)])
def test_repr(self): self.assertEqual(code_point_repr((ord('2'), ord('\\') + 1)), r'2-\\') subset = UnicodeSubset('a-z') self.assertEqual(repr(subset), "UnicodeSubset('a-z')") self.assertEqual(str(subset), "a-z") subset = UnicodeSubset((50, 90)) subset.codepoints.append(sys.maxunicode + 10) # Invalid subset self.assertRaises(ValueError, repr, subset)
def test_reversed(self): subset = UnicodeSubset('0-9ax') self.assertEqual( list(reversed(subset)), [ord('x'), ord('a'), ord('9'), 56, 55, 54, 53, 52, 51, 50, 49, 48])
def test_in_operator(self): subset = UnicodeSubset('0-9a-z') self.assertIn('a', subset) self.assertIn(ord('a'), subset) self.assertIn(ord('z'), subset) self.assertNotIn('/', subset) self.assertNotIn('A', subset) self.assertNotIn(ord('A'), subset) self.assertNotIn(ord('}'), subset) self.assertNotIn(float(ord('a')), subset) self.assertNotIn('.', subset) subset.update('.') self.assertIn('.', subset) self.assertNotIn('/', subset) self.assertNotIn('-', subset)
def test_xor(self): subset = UnicodeSubset('a-z') subset ^= subset self.assertEqual(subset, UnicodeSubset()) subset = UnicodeSubset('a-z') subset ^= UnicodeSubset('a-c') self.assertEqual(subset, UnicodeSubset('d-z')) subset = UnicodeSubset('a-z') subset ^= 'a-f' self.assertEqual(subset, UnicodeSubset('g-z')) with self.assertRaises(TypeError) as ctx: subset = UnicodeSubset('a-z') subset ^= False self.assertIn('unsupported operand type', str(ctx.exception)) subset = UnicodeSubset('a-z') subset ^= 'A-Za-f' self.assertEqual(subset, UnicodeSubset('A-Zg-z'))
def test_subtraction(self): subset = UnicodeSubset([0, 2, (80, 200), 10000]) self.assertEqual(subset - {2, 120, 121, (150, 260)}, [0, (80, 120), (122, 150), 10000]) subset = UnicodeSubset('a-z') subset -= UnicodeSubset('a-c') self.assertEqual(subset, UnicodeSubset('d-z')) subset = UnicodeSubset('a-z') subset -= 'a-c' self.assertEqual(subset, UnicodeSubset('d-z')) with self.assertRaises(TypeError) as ctx: subset = UnicodeSubset('a-z') subset -= False self.assertIn('unsupported operand type', str(ctx.exception))
def test_union_and_intersection(self): s1 = UnicodeSubset([50, (90, 200), 10]) s2 = UnicodeSubset([10, 51, (89, 150), 90]) self.assertEqual(s1 | s2, [10, (50, 52), (89, 200)]) self.assertEqual(s1 & s2, [10, (90, 150)]) subset = UnicodeSubset('a-z') subset |= UnicodeSubset('A-Zfx') self.assertEqual(subset, UnicodeSubset('A-Za-z')) subset |= '0-9' self.assertEqual(subset, UnicodeSubset('0-9A-Za-z')) subset |= [ord('{'), ord('}')] self.assertEqual(subset, UnicodeSubset('0-9A-Za-z{}')) subset = UnicodeSubset('a-z') subset &= UnicodeSubset('A-Zfx') self.assertEqual(subset, UnicodeSubset('fx')) subset &= 'xyz' self.assertEqual(subset, UnicodeSubset('x')) with self.assertRaises(TypeError) as ctx: subset = UnicodeSubset('a-z') subset |= False self.assertIn('unsupported operand type', str(ctx.exception)) with self.assertRaises(TypeError) as ctx: subset = UnicodeSubset('a-z') subset &= False self.assertIn('unsupported operand type', str(ctx.exception))
def test_equality(self): self.assertFalse(UnicodeSubset() == 0.0) self.assertEqual(UnicodeSubset('a-z'), UnicodeSubset('a-kl-z'))
def test_complement(self): subset = UnicodeSubset((50, 90, 10, 90)) self.assertEqual(list(subset.complement()), [(0, 10), (11, 50), (51, 90), (91, sys.maxunicode + 1)]) subset.add(11) self.assertEqual(list(subset.complement()), [(0, 10), (12, 50), (51, 90), (91, sys.maxunicode + 1)]) subset.add((0, 10)) self.assertEqual(list(subset.complement()), [(12, 50), (51, 90), (91, sys.maxunicode + 1)]) s1 = UnicodeSubset( chain(UNICODE_CATEGORIES['L'].codepoints, UNICODE_CATEGORIES['M'].codepoints, UNICODE_CATEGORIES['N'].codepoints, UNICODE_CATEGORIES['S'].codepoints)) s2 = UnicodeSubset( chain(UNICODE_CATEGORIES['C'].codepoints, UNICODE_CATEGORIES['P'].codepoints, UNICODE_CATEGORIES['Z'].codepoints)) self.assertEqual(s1.codepoints, UnicodeSubset(s2.complement()).codepoints) subset = UnicodeSubset((50, 90)) subset.codepoints.append(70) # Invalid subset (unordered) with self.assertRaises(ValueError) as ctx: list(subset.complement()) self.assertEqual( str(ctx.exception), "unordered code points found in UnicodeSubset('2ZF')") subset = UnicodeSubset((sys.maxunicode - 1, )) self.assertEqual(list(subset.complement()), [(0, sys.maxunicode - 1), sys.maxunicode])
def test_update_method(self): subset = UnicodeSubset() subset.update('\\\\') self.assertListEqual(subset.codepoints, [ord('\\')]) subset.update('\\$') self.assertListEqual(subset.codepoints, [ord('$'), ord('\\')]) subset.clear() subset.update('!--') self.assertListEqual(subset.codepoints, [(ord('!'), ord('-') + 1)]) subset.clear() subset.update('!---') self.assertListEqual(subset.codepoints, [(ord('!'), ord('-') + 1)]) subset.clear() subset.update('!--a') self.assertListEqual( subset.codepoints, [(ord('!'), ord('-') + 1), ord('a')]) with self.assertRaises(RegexError): subset.update('[[')
def test_modify(self): subset = UnicodeSubset() for cp in [50, 90, 10, 90]: subset.add(cp) self.assertEqual(subset, [10, 50, 90]) self.assertRaises(ValueError, subset.add, -1) self.assertRaises(ValueError, subset.add, sys.maxunicode + 1) subset.add((100, 20001)) subset.discard((100, 19001)) self.assertEqual(subset, [10, 50, 90, (19001, 20001)]) subset.add(0) subset.discard(1) self.assertEqual(subset, [0, 10, 50, 90, (19001, 20001)]) subset.discard(0) self.assertEqual(subset, [10, 50, 90, (19001, 20001)]) subset.discard((10, 100)) self.assertEqual(subset, [(19001, 20001)]) subset.add(20) subset.add(19) subset.add(30) subset.add([30, 33]) subset.add(30000) subset.add(30001) self.assertEqual(subset, [(19, 21), (30, 33), (19001, 20001), (30000, 30002)]) subset.add(22) subset.add(21) subset.add(22) self.assertEqual(subset, [(19, 22), 22, (30, 33), (19001, 20001), (30000, 30002)]) subset.discard((90, 50000)) self.assertEqual(subset, [(19, 22), 22, (30, 33)]) subset.discard(21) subset.discard(19) self.assertEqual(subset, [20, 22, (30, 33)]) subset.discard((0, 200)) self.assertEqual(subset, []) with self.assertRaises(ValueError): subset.discard(None) with self.assertRaises(ValueError): subset.discard((10, 11, 12))