示例#1
0
 def test_filter_transliterate_replace_codepoint_length(self):
     self.filter_transliterate([u"x" + gf.safe_unichr(0x0008) + u"z"], [u"xaz"])
     self.filter_transliterate([u"x" + gf.safe_unichr(0x0088) + u"z"], [u"xaz"])
     self.filter_transliterate([u"x" + gf.safe_unichr(0x0888) + u"z"], [u"xaz"])
     self.filter_transliterate([u"x" + gf.safe_unichr(0x8888) + u"z"], [u"xaz"])
     if gf.is_py2_narrow_build():
         # NOTE Python 2 narrow builds cannot handle codepoints above 0x10000 correctly
         pass
     else:
         self.filter_transliterate([u"x" + gf.safe_unichr(0x88888) + u"z"], [u"xaz"])
         self.filter_transliterate([u"x" + gf.safe_unichr(0x108888) + u"z"], [u"xaz"])
示例#2
0
 def _build_map(self):
     """
     Read the map file at path.
     """
     if gf.is_py2_narrow_build():
         self.log_warn(
             u"Running on a Python 2 narrow build: be aware that Unicode chars above 0x10000 cannot be replaced correctly."
         )
     self.trans_map = {}
     with io.open(self.file_path, "r", encoding="utf-8") as file_obj:
         contents = file_obj.read().replace(u"\t", u" ")
         for line in contents.splitlines():
             # ignore lines starting with "#" or blank (after stripping)
             if not line.startswith(u"#"):
                 line = line.strip()
                 if len(line) > 0:
                     self._process_map_rule(line)
示例#3
0
 def transliterate(self, string):
     result = []
     #
     # NOTE on Python 2 narrow builds,
     #      this iterator is not 100% correct
     #      because an Unicode character above 0x10000
     #      is "split" into two characters,
     #      and hence it cannot be found as a key of the map
     #
     if gf.is_py2_narrow_build():
         self.log_warn(
             u"Running on a Python 2 narrow build: be aware that Unicode chars above 0x10000 cannot be replaced correctly."
         )
     for char in string:
         try:
             result.append(self.trans_map[char])
         except:
             result.append(char)
     result = u"".join(result)
     return result