示例#1
0
 def test_circled_latin(self):
     # 1 sequence of a-z
     for n in range(0, 26):
         self.assertEqual(
             unidecode(chr(0x24d0 + n)),
             chr(ord('a') + n),
         )
示例#2
0
 def test_mathematical_digits(self):
     # 5 consecutive sequences of 0-9
     for n in range(0x1d7ce, 0x1d800):
         self.assertEqual(
             unidecode(chr(n)),
             chr(ord('0') + (n - 0x1d7ce) % 10),
         )
示例#3
0
 def handle_charref(self, name):
     if name.startswith(('x', 'X')):
         char = chr(int(name[1:], 16))
     else:
         char = chr(int(name))
     if char is not None:
         self._append(char)
示例#4
0
def ENTITIES_REPLACEMENT(matchobj):
    if matchobj.group(1):
        return chr(int(matchobj.group(1), 16))
    if matchobj.group(2):
        return chr(int(matchobj.group(2)))
    try:
        return ENTITIES_TO_CHARACTERS[matchobj.group(3)]
    except KeyError:
        return matchobj.group(0)
示例#5
0
    def test_mathematical_latin(self):
        # 13 consecutive sequences of A-Z, a-z with some codepoints
        # undefined. We just count the undefined ones and don't check
        # positions.
        empty_count = 0
        for n in range(0x1d400, 0x1d6a4):
            a = unidecode(chr(n))
            if n % 52 < 26:
                b = chr(ord('A') + n % 26)
            else:
                b = chr(ord('a') + n % 26)
            if not a:
                empty_count += 1
            else:
                self.assertEqual(a, b)

        self.assertEqual(empty_count, 24)
示例#6
0
 def fixup(m):
     text = m.group(0)
     if text[:2] == "&#":
         # character reference
         try:
             if text[:3] == "&#x":
                 return chr(int(text[3:-1], 16))
             else:
                 return chr(int(text[2:-1]))
         except ValueError:
             pass
     else:
         # named entity
         try:
             text = chr(name2codepoint[text[1:-1]])
         except KeyError:
             pass
     return text  # leave as is
示例#7
0
 def fixup(m):
     text = m.group(0)
     if text[:2] == "&#":
         # character reference
         try:
             if text[:3] == "&#x":
                 return chr(int(text[3:-1], 16))
             else:
                 return chr(int(text[2:-1]))
         except ValueError:
             pass
     else:
         # named entity
         try:
             text = chr(name2codepoint[text[1:-1]])
         except KeyError:
             pass
     return text  # leave as is
示例#8
0
 def test_bmp(self):
     # Just check that it doesn't throw an exception
     for n in range(0, 0x10000):
         unidecode(chr(n))
示例#9
0
 def test_ascii(self):
     for n in range(0, 128):
         self.assertEqual(
             unidecode(chr(n)),
             chr(n),
         )