示例#1
0
 def test_iterate(self):
     subset = UnicodeSubset('a-d')
     self.assertListEqual(list(iter(subset)),
                          [ord('a'), ord('b'),
                           ord('c'), ord('d')])
     self.assertListEqual(list(subset.iter_characters()),
                          ['a', 'b', 'c', 'd'])
示例#2
0
 def test_max_and_min(self):
     s1 = UnicodeSubset([10, 51, (89, 151), 90])
     s2 = UnicodeSubset([0, 2, (80, 201), 10000])
     s3 = UnicodeSubset([1])
     self.assertEqual((min(s1), max(s1)), (10, 150))
     self.assertEqual((min(s2), max(s2)), (0, 10000))
     self.assertEqual((min(s3), max(s3)), (1, 1))
示例#3
0
    def test_difference_update_method(self):
        subset = UnicodeSubset('a-z')
        subset.difference_update('a-c')
        self.assertEqual(subset, UnicodeSubset('d-z'))

        subset = UnicodeSubset('a-z')
        subset.difference_update([(ord('a'), ord('c') + 1)])
        self.assertEqual(subset, UnicodeSubset('d-z'))
示例#4
0
    def test_creation(self):
        subset = UnicodeSubset([(0, 9), 11, 12, (14, 32), (33, sys.maxunicode + 1)])
        self.assertEqual(subset, [(0, 9), 11, 12, (14, 32), (33, sys.maxunicode + 1)])
        self.assertEqual(UnicodeSubset('0-9'), [(48, 58)])
        self.assertEqual(UnicodeSubset('0-9:'), [(48, 59)])

        subset = UnicodeSubset('a-z')
        self.assertEqual(UnicodeSubset(subset), [(ord('a'), ord('z') + 1)])
示例#5
0
    def test_repr(self):
        self.assertEqual(code_point_repr((ord('2'), ord('\\') + 1)), r'2-\\')

        subset = UnicodeSubset('a-z')
        self.assertEqual(repr(subset), "UnicodeSubset('a-z')")
        self.assertEqual(str(subset), "a-z")

        subset = UnicodeSubset((50, 90))
        subset.codepoints.append(sys.maxunicode + 10)  # Invalid subset
        self.assertRaises(ValueError, repr, subset)
示例#6
0
 def test_reversed(self):
     subset = UnicodeSubset('0-9ax')
     self.assertEqual(
         list(reversed(subset)),
         [ord('x'),
          ord('a'),
          ord('9'), 56, 55, 54, 53, 52, 51, 50, 49, 48])
示例#7
0
    def test_in_operator(self):
        subset = UnicodeSubset('0-9a-z')

        self.assertIn('a', subset)
        self.assertIn(ord('a'), subset)
        self.assertIn(ord('z'), subset)

        self.assertNotIn('/', subset)
        self.assertNotIn('A', subset)
        self.assertNotIn(ord('A'), subset)
        self.assertNotIn(ord('}'), subset)
        self.assertNotIn(float(ord('a')), subset)

        self.assertNotIn('.', subset)
        subset.update('.')
        self.assertIn('.', subset)
        self.assertNotIn('/', subset)
        self.assertNotIn('-', subset)
示例#8
0
    def test_xor(self):
        subset = UnicodeSubset('a-z')
        subset ^= subset
        self.assertEqual(subset, UnicodeSubset())

        subset = UnicodeSubset('a-z')
        subset ^= UnicodeSubset('a-c')
        self.assertEqual(subset, UnicodeSubset('d-z'))

        subset = UnicodeSubset('a-z')
        subset ^= 'a-f'
        self.assertEqual(subset, UnicodeSubset('g-z'))

        with self.assertRaises(TypeError) as ctx:
            subset = UnicodeSubset('a-z')
            subset ^= False
        self.assertIn('unsupported operand type', str(ctx.exception))

        subset = UnicodeSubset('a-z')
        subset ^= 'A-Za-f'
        self.assertEqual(subset, UnicodeSubset('A-Zg-z'))
示例#9
0
    def test_subtraction(self):
        subset = UnicodeSubset([0, 2, (80, 200), 10000])
        self.assertEqual(subset - {2, 120, 121, (150, 260)}, [0, (80, 120), (122, 150), 10000])

        subset = UnicodeSubset('a-z')
        subset -= UnicodeSubset('a-c')
        self.assertEqual(subset, UnicodeSubset('d-z'))

        subset = UnicodeSubset('a-z')
        subset -= 'a-c'
        self.assertEqual(subset, UnicodeSubset('d-z'))

        with self.assertRaises(TypeError) as ctx:
            subset = UnicodeSubset('a-z')
            subset -= False
        self.assertIn('unsupported operand type', str(ctx.exception))
示例#10
0
    def test_union_and_intersection(self):
        s1 = UnicodeSubset([50, (90, 200), 10])
        s2 = UnicodeSubset([10, 51, (89, 150), 90])
        self.assertEqual(s1 | s2, [10, (50, 52), (89, 200)])
        self.assertEqual(s1 & s2, [10, (90, 150)])

        subset = UnicodeSubset('a-z')
        subset |= UnicodeSubset('A-Zfx')
        self.assertEqual(subset, UnicodeSubset('A-Za-z'))
        subset |= '0-9'
        self.assertEqual(subset, UnicodeSubset('0-9A-Za-z'))
        subset |= [ord('{'), ord('}')]
        self.assertEqual(subset, UnicodeSubset('0-9A-Za-z{}'))

        subset = UnicodeSubset('a-z')
        subset &= UnicodeSubset('A-Zfx')
        self.assertEqual(subset, UnicodeSubset('fx'))
        subset &= 'xyz'
        self.assertEqual(subset, UnicodeSubset('x'))

        with self.assertRaises(TypeError) as ctx:
            subset = UnicodeSubset('a-z')
            subset |= False
        self.assertIn('unsupported operand type', str(ctx.exception))

        with self.assertRaises(TypeError) as ctx:
            subset = UnicodeSubset('a-z')
            subset &= False
        self.assertIn('unsupported operand type', str(ctx.exception))
示例#11
0
 def test_equality(self):
     self.assertFalse(UnicodeSubset() == 0.0)
     self.assertEqual(UnicodeSubset('a-z'), UnicodeSubset('a-kl-z'))
示例#12
0
    def test_complement(self):
        subset = UnicodeSubset((50, 90, 10, 90))
        self.assertEqual(list(subset.complement()), [(0, 10), (11, 50),
                                                     (51, 90),
                                                     (91, sys.maxunicode + 1)])
        subset.add(11)
        self.assertEqual(list(subset.complement()), [(0, 10), (12, 50),
                                                     (51, 90),
                                                     (91, sys.maxunicode + 1)])
        subset.add((0, 10))
        self.assertEqual(list(subset.complement()), [(12, 50), (51, 90),
                                                     (91, sys.maxunicode + 1)])

        s1 = UnicodeSubset(
            chain(UNICODE_CATEGORIES['L'].codepoints,
                  UNICODE_CATEGORIES['M'].codepoints,
                  UNICODE_CATEGORIES['N'].codepoints,
                  UNICODE_CATEGORIES['S'].codepoints))
        s2 = UnicodeSubset(
            chain(UNICODE_CATEGORIES['C'].codepoints,
                  UNICODE_CATEGORIES['P'].codepoints,
                  UNICODE_CATEGORIES['Z'].codepoints))
        self.assertEqual(s1.codepoints,
                         UnicodeSubset(s2.complement()).codepoints)

        subset = UnicodeSubset((50, 90))
        subset.codepoints.append(70)  # Invalid subset (unordered)
        with self.assertRaises(ValueError) as ctx:
            list(subset.complement())
        self.assertEqual(
            str(ctx.exception),
            "unordered code points found in UnicodeSubset('2ZF')")

        subset = UnicodeSubset((sys.maxunicode - 1, ))
        self.assertEqual(list(subset.complement()),
                         [(0, sys.maxunicode - 1), sys.maxunicode])
示例#13
0
    def test_update_method(self):
        subset = UnicodeSubset()
        subset.update('\\\\')
        self.assertListEqual(subset.codepoints, [ord('\\')])
        subset.update('\\$')
        self.assertListEqual(subset.codepoints, [ord('$'), ord('\\')])

        subset.clear()
        subset.update('!--')
        self.assertListEqual(subset.codepoints, [(ord('!'), ord('-') + 1)])

        subset.clear()
        subset.update('!---')
        self.assertListEqual(subset.codepoints, [(ord('!'), ord('-') + 1)])

        subset.clear()
        subset.update('!--a')
        self.assertListEqual(
            subset.codepoints,
            [(ord('!'), ord('-') + 1), ord('a')])

        with self.assertRaises(RegexError):
            subset.update('[[')
示例#14
0
    def test_modify(self):
        subset = UnicodeSubset()
        for cp in [50, 90, 10, 90]:
            subset.add(cp)
        self.assertEqual(subset, [10, 50, 90])
        self.assertRaises(ValueError, subset.add, -1)
        self.assertRaises(ValueError, subset.add, sys.maxunicode + 1)
        subset.add((100, 20001))
        subset.discard((100, 19001))
        self.assertEqual(subset, [10, 50, 90, (19001, 20001)])
        subset.add(0)
        subset.discard(1)
        self.assertEqual(subset, [0, 10, 50, 90, (19001, 20001)])
        subset.discard(0)
        self.assertEqual(subset, [10, 50, 90, (19001, 20001)])
        subset.discard((10, 100))
        self.assertEqual(subset, [(19001, 20001)])
        subset.add(20)
        subset.add(19)
        subset.add(30)
        subset.add([30, 33])
        subset.add(30000)
        subset.add(30001)
        self.assertEqual(subset, [(19, 21), (30, 33), (19001, 20001),
                                  (30000, 30002)])
        subset.add(22)
        subset.add(21)
        subset.add(22)
        self.assertEqual(subset, [(19, 22), 22, (30, 33), (19001, 20001),
                                  (30000, 30002)])
        subset.discard((90, 50000))
        self.assertEqual(subset, [(19, 22), 22, (30, 33)])
        subset.discard(21)
        subset.discard(19)
        self.assertEqual(subset, [20, 22, (30, 33)])
        subset.discard((0, 200))
        self.assertEqual(subset, [])

        with self.assertRaises(ValueError):
            subset.discard(None)
        with self.assertRaises(ValueError):
            subset.discard((10, 11, 12))