Exemplo n.º 1
0
 def test_combining_chars(self):
     TESTS = [
         ("\u0031\u20de", "1"),
     ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
    def test_long_japanese_text(self):

        input = "日本国民は、正当に選挙された国会における代表者を通じて行動し、われらとわれらの子孫のために、諸国民との協和による成果と、わが国全土にわたつて自由のもたらす恵沢を確保し、政府の行為によつて再び戦争の惨禍が起ることのないやうにすることを決意し、ここに主権が国民に存することを宣言し、この憲法を確定する。そもそも国政は、国民の厳粛な信託によるものであつて、その権威は国民に由来し、その権力は国民の代表者がこれを行使し、その福利は国民がこれを享受する。これは人類普遍の原理であり、この憲法は、かかる原理に基くものである。われらは、これに反する一切の憲法、法令及び詔勅を排除する。"
        output = "Nihonkokumin ha, Seitou ni Senkyo sareta Kokkai niokeru Daihyousha wo Tsuuji te Koudou shi, wareratowarerano Shison notameni, Shokokumin tono Kyouwa niyoru Seika to, waga Kuni Zendo niwatatsute Jiyuu nomotarasu Keitaku wo Kakuho shi, Seifu no Koui niyotsute Futatabi Sensou no Sanka ga Okoru kotononaiyaunisurukotowo Ketsui shi, kokoni Shuken ga Kokumin ni Sonsu rukotowo Sengen shi, kono Kenpou wo Kakuteisu ru. somosomo Kokusei ha, Kokumin no Genshuku na Shintaku niyorumonodeatsute, sono Ken'i ha Kokumin ni Yurai shi, sono Kenryoku ha Kokumin no Daihyousha gakorewo Koushi shi, sono Fukuri ha Kokumin gakorewo Kyouju suru. koreha Jinruifuhen no Genri deari, kono Kenpou ha, kakaru Genri ni Motozuku monodearu. wareraha, koreni Hansu ru Issai no Kenpou, Hourei Oyobi Shouchoku wo Haijo suru."

        u = Unihandecoder(lang="ja")
        self.assertEqual(u.decode(input), output)
Exemplo n.º 3
0
 def test_combining_chars(self):
     TESTS = [
             ("\u0031\u20de",    "1"),
                 ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 4
0
    def test_specific_bmp(self):

        TESTS = [
            ("Hello, World!", "Hello, World!"),
            ("'\"\r\n", "'\"\r\n"),
            ("ČŽŠčžš", "CZSczs"),
            ("ア", "a"),
            ("α", "a"),
            ("а", "a"),
            ('ch\xe2teau', "chateau"),
            ('vi\xf1edos', "vinedos"),
            ("\u5317\u4EB0", "Bei Jing "),
            ("Efficient", "Efficient"),

            # Table that doesn't exist
            ('\ua500', ''),

            # Table that has less than 256 entriees
            ('\u1eff', ''),

            # Mark area
            (
                "\u210a",  #gram mark
                "g"),
        ]
        u = Unihandecoder()
        for instr, output in TESTS:
            self.failUnlessEqual(u.decode(instr), output)
Exemplo n.º 5
0
 def test_ja_itaiji(self):
     JATESTS = [
         (u"森鷗外",'Mori Ougai'), # itaiji
        ]
     u = Unihandecoder(lang="ja")
     for input, output in JATESTS:
         self.assertEqual(u.decode(input), output)
    def test_long_japanese_text(self):

        input = "日本国民は、正当に選挙された国会における代表者を通じて行動し、われらとわれらの子孫のために、諸国民との協和による成果と、わが国全土にわたつて自由のもたらす恵沢を確保し、政府の行為によつて再び戦争の惨禍が起ることのないやうにすることを決意し、ここに主権が国民に存することを宣言し、この憲法を確定する。そもそも国政は、国民の厳粛な信託によるものであつて、その権威は国民に由来し、その権力は国民の代表者がこれを行使し、その福利は国民がこれを享受する。これは人類普遍の原理であり、この憲法は、かかる原理に基くものである。われらは、これに反する一切の憲法、法令及び詔勅を排除する。"
        output = "Nihonkokumin ha, Seitou ni Senkyo sareta Kokkai niokeru Daihyousha wo Tsuuji te Koudou shi, wareratowarerano Shison notameni, Shokokumin tono Kyouwa niyoru Seika to, waga Kuni Zendo niwatatsute Jiyuu nomotarasu Keitaku wo Kakuho shi, Seifu no Koui niyotsute Futatabi Sensou no Sanka ga Okoru kotononaiyaunisurukotowo Ketsui shi, kokoni Shuken ga Kokumin ni Sonsu rukotowo Sengen shi, kono Kenpou wo Kakuteisu ru. somosomo Kokusei ha, Kokumin no Genshuku na Shintaku niyorumonodeatsute, sono Ken'i ha Kokumin ni Yurai shi, sono Kenryoku ha Kokumin no Daihyousha gakorewo Koushi shi, sono Fukuri ha Kokumin gakorewo Kyouju suru. koreha Jinruifuhen no Genri deari, kono Kenpou ha, kakaru Genri ni Motozuku monodearu. wareraha, koreni Hansu ru Issai no Kenpou, Hourei Oyobi Shouchoku wo Haijo suru."

        u = Unihandecoder(lang="ja")
        self.assertEqual(u.decode(input), output)
Exemplo n.º 7
0
 def test_squared_chars(self):
     TESTS = [
         (u"\u3301", "alpha"),  # combined Alpha in Katakana
         (u"\u3302", "ampere"),  # combined Ampere in Katakana 
         (u"\u3304", "inning"),
         (u"\u3306", "won"),  # combined Won in Katakana
         (u"\u3307", "escudo"),
         (u"\u3308", "acre"),  # combined Acre in Katakana
         (u"\u3309", "ounce"),  # combined ounce in Katakana
         (u"\u330a", "ohm"),  # combined Ohm in Katakana
         (u"\u3349", "milli"),  # milli in Katakana
         (u"\u3314", "kilo"),  # kilo in Katakana
         (u"\u3315", "kilogram"),  # kilo gram in Katakana
         (u"\u3316", "kilometer"),  # kilo metre in Katakana
         (u"\u3322", "centi"),  # centi in Katakana
         (u"\u334d", "meter"),  #metre in Katakana
         (u"\u3318", "gram"),  # gram in Katakana
         (u"\u3327", "ton"),  # ton in Katakana
         (u"\u3303", "are"),  # are in Katakana
         (u"\u3336", "hectare"),  # hect-are in Katakana
         (u"\u337f", "Inc."),  # kabusiki kaisha in Katakana
     ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 8
0
 def test_yue(self):
     YUETESTS = [
         ('香港', 'Hoeng Gong '),
     ]
     u = Unihandecoder(lang="yue")
     for input, output in YUETESTS:
         self.assertEqual(u.decode(input), output)
Exemplo n.º 9
0
    def test_specific_bmp(self):

        TESTS = [
            ("Hello, World!", "Hello, World!"),
            ("'\"\r\n", "'\"\r\n"),
            ("ČŽŠčžš", "CZSczs"),
            ("\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7",
             " !C/PS\u005c$?Y=|SS"),
            ("\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af",
             "\u0022(c)a<<!(r)-"),
            ("ア", "a"),
            ("α", "a"),
            ("а", "a"),
            ('ch\xe2teau', "chateau"),
            ('vi\xf1edos', "vinedos"),
            ("\u5317\u4EB0", "Bei Jing "),
            ("Efficient", "Efficient"),

            # Table that doesn't exist
            ('\ua500', ''),

            # Table that has less than 256 entriees
            ('\u1eff', ''),

            # Mark area
            (
                "\u210a",  #gram mark
                "g"),
        ]

        u = Unihandecoder(lang="zh")
        for input, output in TESTS:
            self.assertEqual(u.decode(input), output)
Exemplo n.º 10
0
 def test_ja_itaiji(self):
     JATESTS = [
         ("森鷗外", 'Mori Ougai'),  # itaiji
     ]
     u = Unihandecoder(lang="ja")
     for input, output in JATESTS:
         self.assertEqual(u.decode(input), output)
Exemplo n.º 11
0
 def test_squared_chars(self):
     TESTS = [
             (u"\u3301", "alpha"), # combined Alpha in Katakana
             (u"\u3302", "ampere"), # combined Ampere in Katakana 
             (u"\u3304", "inning"),
             (u"\u3306", "won"), # combined Won in Katakana
             (u"\u3307", "escudo"), 
             (u"\u3308", "acre"), # combined Acre in Katakana
             (u"\u3309", "ounce"), # combined ounce in Katakana
             (u"\u330a", "ohm"), # combined Ohm in Katakana
             (u"\u3349", "milli"), # milli in Katakana
             (u"\u3314", "kilo"), # kilo in Katakana
             (u"\u3315", "kilogram"), # kilo gram in Katakana
             (u"\u3316", "kilometer"), # kilo metre in Katakana
             (u"\u3322", "centi"), # centi in Katakana
             (u"\u334d", "meter"), #metre in Katakana
             (u"\u3318", "gram"), # gram in Katakana
             (u"\u3327", "ton"), # ton in Katakana
             (u"\u3303", "are"), # are in Katakana
             (u"\u3336", "hectare"), # hect-are in Katakana
             (u"\u337f", "Inc."), # kabusiki kaisha in Katakana
            ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 12
0
 def test_mac_japanese_pua(self):
     TESTS = [
         ("\uF862\u6709\u9650\u4F1A\u793E", "Yuugengaisha"),
     ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 13
0
    def test_mathematical_digits(self):
        # 5 consecutive sequences of 0-9
        u = Unihandecoder()
        for n in range(0x1d7ce, 0x1d800):
            a = chr(ord('0') + (n - 0x1d7ce) % 10)
            b = u.decode(chr(n))

            self.failUnlessEqual(b, a)
Exemplo n.º 14
0
 def test_compatibility_composite(self):
     TESTS = [
         (u"\ufb01", "fi"),
         (u"\u0032\u2075", "25"),
     ]
     u = Unihandecoder(lang="zh")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 15
0
 def test_combining_chars(self):
     TESTS = [
             #  roman number "1"  wrapped with solid square 
             (u"\u0031\u20de",    "1"), 
             ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 16
0
 def test_compatibility_composite(self):
     TESTS = [
             (u"\ufb01","fi"),
             (u"\u0032\u2075", "25"),
                    ]
     u = Unihandecoder(lang="zh")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 17
0
 def test_mathematical_digits(self):
     # 5 consecutive sequences of 0-9
     u = Unihandecoder()
     for n in range(0x1d7ce, 0x1d800):
         a = chr(ord('0') + (n-0x1d7ce) % 10)
         b = u.decode(chr(n))
         
         self.failUnlessEqual(b, a)
Exemplo n.º 18
0
 def test_mac_japanese_pua(self):
     TESTS = [
             ("\uF862\u6709\u9650\u4F1A\u793E",
             "Yuugengaisha"),
                 ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 19
0
 def test_zh(self):
     ZHTESTS = [
         ('\u660e\u5929\u660e\u5929\u7684\u98ce\u5439',
          'Ming Tian Ming Tian De Feng Chui ')
         ]
     u = Unihandecoder(lang="zh")
     for instr, output in ZHTESTS:
         self.failUnlessEqual(u.decode(instr), output)
Exemplo n.º 20
0
 def test_vn(self):
     VNTESTS = [('Ng\xe0y mai gi\xf3 th\u1ed5i v\xe0o ng\xe0y mai',
                 'Ngay mai gio thoi vao ngay mai'),
                ("\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
                 'Minh Tian Minh Tian De Feng Xuy ')]
     u = Unihandecoder(lang="vn")
     for input, output in VNTESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 21
0
 def test_combining_chars(self):
     TESTS = [
         #  roman number "1"  wrapped with solid square
         (u"\u0031\u20de", "1"),
     ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 22
0
 def test_yue(self):
     YUETESTS = [
         (u'香港',
         'Hoeng Gong '),
         ]
     u = Unihandecoder(lang="yue")
     for input, output in YUETESTS:
         self.assertEqual(u.decode(input), output)
Exemplo n.º 23
0
 def test_kana(self):
     u = Unihandecoder(lang="ja")
     for n in xrange(0x3000,0x30ff):
         # Just check that it doesn't throw an exception
         try:
             t = unichr(n)
             u.decode(t)
         except:
             print "catch error at %02x"%n
Exemplo n.º 24
0
 def test_decomposed_form(self):
     TESTS = [
             ("\u0041\u0301", "A"),
             ("\u0061\u0323\u0302", "a"),
             ("\u304B\u3099", "ga"),
             ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 25
0
 def test_decomposed_form(self):
     TESTS = [
         ("\u0041\u0301", "A"),
         ("\u0061\u0323\u0302", "a"),
         ("\u304B\u3099", "ga"),
     ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 26
0
 def test_kana(self):
     u = Unihandecoder(lang="ja")
     for n in range(0x3000, 0x30ff):
         # Just check that it doesn't throw an exception
         try:
             t = six.unichr(n)
             u.decode(t)
         except:
             print("catch error at %02x" % n)
Exemplo n.º 27
0
 def test_bmp(self):
     u = Unihandecoder(lang="zh")
     for n in range(0,0x10000):
         # Just check that it doesn't throw an exception
         try:
             t = unichr(n)
             u.decode(t)
         except:
             print("catch error at %02x"%n)
Exemplo n.º 28
0
 def test_zh(self):
     ZHTESTS = [
         (u"\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
          'Ming Tian Ming Tian De Feng Chui '),
         (u"馮", "Feng "),
         ]
     u = Unihandecoder(lang="zh")
     for input, output in ZHTESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 29
0
 def test_bmp(self):
     u = Unihandecoder(lang="zh")
     for n in xrange(0, 0x10000):
         # Just check that it doesn't throw an exception
         try:
             t = unichr(n)
             u.decode(t)
         except:
             print "catch error at %02x" % n
Exemplo n.º 30
0
 def test_zh(self):
     ZHTESTS = [
         (u"\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
          'Ming Tian Ming Tian De Feng Chui '),
         (u"馮", "Feng "),
     ]
     u = Unihandecoder(lang="zh")
     for input, output in ZHTESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 31
0
 def test_kr(self):
     KRTESTS = [(
         u'\ub0b4\uc77c\uc740 \ub0b4\uc77c \ubc14\ub78c\uc774 \ubd84\ub2e4',
         'naeileun naeil barami bunda'),
                (u"\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
                 'Myeng Chen Myeng Chen Cek Feng Chwi ')]
     u = Unihandecoder(lang="kr")
     for input, output in KRTESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 32
0
 def test_ja(self):
     JATESTS = [
         ('\u660e\u65e5\u306f\u660e\u65e5\u306e\u98a8\u304c\u5439\u304f',
         'Ashita ha Ashita no Kaze ga Fuku'),
         ("\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
         'Mei Tenmei Ten Teki Sui')
         ]
     u = Unihandecoder(lang="ja")
     for instr, output in JATESTS:
         self.failUnlessEqual(u.decode(instr), output)
Exemplo n.º 33
0
 def test_ja(self):
     JATESTS = [
         ('\u660e\u65e5\u306f\u660e\u65e5\u306e\u98a8\u304c\u5439\u304f',
          'Ashita ha Ashita no Kaze ga Fuku'),
         ("\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
          'Mei Tenmei Ten Teki Sui')
     ]
     u = Unihandecoder(lang="ja")
     for instr, output in JATESTS:
         self.failUnlessEqual(u.decode(instr), output)
Exemplo n.º 34
0
 def test_zh(self):
     ZHTESTS = [
         ("\u3400", 'Qiu '),
         ("\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
          'Ming Tian Ming Tian De Feng Chui '),
         ("馮", "Feng "),
     ]
     u = Unihandecoder(lang="zh")
     for input, output in ZHTESTS:
         self.assertEqual(u.decode(input), output)
Exemplo n.º 35
0
 def test_decomposed_form(self):
     TESTS = [
         (u"\u0041\u0301", "A"),  # "A" with accent mark 
         (u"\u0061\u0323\u0302", "a"),  #  "a" with accent marks
         (u"\u30AB\u3099", "ga"),  # "ガ" coded by decomposed from as ' カ゛ '
         (u"\u304B\u3099", "ga"),  # "が" coded by decomposed from as ' か゛ '
     ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 36
0
 def test_zh(self):
     ZHTESTS = [
         ("\u3400", 'Qiu '),
         ("\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
          'Ming Tian Ming Tian De Feng Chui '),
         ("馮", "Feng "),
         ]
     u = Unihandecoder(lang="zh")
     for input, output in ZHTESTS:
         self.assertEqual(u.decode(input), output)
Exemplo n.º 37
0
 def test_decomposed_form(self):
     TESTS = [
             (u"\u0041\u0301", "A"),  # "A" with accent mark 
             (u"\u0061\u0323\u0302", "a"), #  "a" with accent marks
             (u"\u30AB\u3099", "ga"), # "ガ" coded by decomposed from as ' カ゛ '
             (u"\u304B\u3099", "ga"), # "が" coded by decomposed from as ' か゛ '
             ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 38
0
 def test_vn(self):
     VNTESTS = [
         (u'Ng\xe0y mai gi\xf3 th\u1ed5i v\xe0o ng\xe0y mai',
         'Ngay mai gio thoi vao ngay mai'),
         (u"\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
         'Minh Tian Minh Tian De Feng Xuy ')
         ]
     u = Unihandecoder(lang="vn")
     for input, output in VNTESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 39
0
 def test_kr(self):
     KRTESTS = [
         (u'\ub0b4\uc77c\uc740 \ub0b4\uc77c \ubc14\ub78c\uc774 \ubd84\ub2e4',
             'naeileun naeil barami bunda'),
         (u"\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
          'Myeng Chen Myeng Chen Cek Feng Chwi ')
         ]
     u = Unihandecoder(lang="kr")
     for input, output in KRTESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 40
0
    def test_specific_bmp(self):

        TESTS = [
                (u"Hello, World!", 
                "Hello, World!"),

                (u"'\"\r\n",
                 "'\"\r\n"),

                (u"ČŽŠčžš",
                 "CZSczs"),

                (u"\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7",
                  u" !C/PS\u005c$?Y=|SS"),
                (u"\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af",
                  u"\u0022(c)a<<!(r)-"),

                (u"ア",
                 "a"),

                (u"α",
                "a"),

                (u"а",
                "a"),

                (u'ch\xe2teau',
                "chateau"),

                (u'vi\xf1edos',
                "vinedos"),
                
                (u"\u5317\u4EB0",
                "Bei Jing "),

                (u"Efficient",
                "Efficient"),

                # Table that doesn't exist
                (u'\ua500',
                ''),
                
                # Table that has less than 256 entriees
                (u'\u1eff',
                ''),

                # Mark area
                (u"\u210a",  #gram mark
                "g"),

            ]

        u = Unihandecoder(lang="zh")
        for input, output in TESTS:
            self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 41
0
 def test_mac_japanese_pua(self):
     TESTS = [
             (u"\uF862\u6709\u9650\u4F1A\u793E",  #Adobe CID 8321
             "Yuugengaisha"),
             (u"\u5927\u20dd", "Dai "),  # "大" with circle
             (u"\u5c0f\u20dd", "Shou "), # "小" with circle
             (u"\u63a7\u20dd", "Hikae "),  # "控" with circle
                 ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 42
0
 def test_ja(self):
     JATESTS = [
         (u'\u660e\u65e5\u306f\u660e\u65e5\u306e\u98a8\u304c\u5439\u304f',
          'Ashita ha Ashita no Kaze ga Fuku'),
         (u"\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
          'Mei Tenmei Ten Teki Sui'),
         (u"馮", "Fuu"),  # Fuu in human's name, Hyou in another case
     ]
     u = Unihandecoder(lang="ja")
     for input, output in JATESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 43
0
 def test_ja(self):
     JATESTS = [
         (u'\u660e\u65e5\u306f\u660e\u65e5\u306e\u98a8\u304c\u5439\u304f',
         'Ashita ha Ashita no Kaze ga Fuku'),
         (u"\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
         'Mei Tenmei Ten Teki Sui'),
         (u"馮", "Fuu"), # Fuu in human's name, Hyou in another case
         ]
     u = Unihandecoder(lang="ja")
     for input, output in JATESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 44
0
 def test_mac_japanese_pua(self):
     TESTS = [
         (
             u"\uF862\u6709\u9650\u4F1A\u793E",  #Adobe CID 8321
             "Yuugengaisha"),
         (u"\u5927\u20dd", "Dai "),  # "大" with circle
         (u"\u5c0f\u20dd", "Shou "),  # "小" with circle
         (u"\u63a7\u20dd", "Hikae "),  # "控" with circle
     ]
     u = Unihandecoder(lang="ja")
     for input, output in TESTS:
         self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 45
0
    def test_mathematical_digits(self):
        if sys.maxunicode < 0x1d800:
            print "skip test because of Narrow Python"
            return

        u = Unihandecoder(lang="zh")
        # 5 consecutive sequences of 0-9
        for n in xrange(0x1d7ce, 0x1d800):
            a = chr(ord('0') + (n - 0x1d7ce) % 10)
            b = u.decode(unichr(n))

            self.failUnlessEqual(b, a)
Exemplo n.º 46
0
    def test_specific_ext(self):

        TESTS = [
            # Non-BMP character
            ('\U0001d5a0', 'A'),

            # Mathematical
            ('\U0001d5c4\U0001d5c6/\U0001d5c1', 'km/h'),
        ]
        u = Unihandecoder(lang="zh")
        for input, output in TESTS:
            self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 47
0
    def test_mathematical_digits(self):
        if sys.maxunicode < 0x1d800:
            print "skip test because of Narrow Python"
            return

        u = Unihandecoder(lang="zh")
        # 5 consecutive sequences of 0-9
        for n in xrange(0x1d7ce, 0x1d800):
            a = chr(ord('0') + (n-0x1d7ce) % 10)
            b = u.decode(unichr(n))

            self.failUnlessEqual(b, a)
Exemplo n.º 48
0
    def test_specific_ext(self):

        TESTS = [
                # Non-BMP character
                ('\U0001d5a0',
                'A'),

                # Mathematical
                ('\U0001d5c4\U0001d5c6/\U0001d5c1',
                'km/h'),
        ]
        u = Unihandecoder(lang="zh")
        for input, output in TESTS:
            self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 49
0
    def test_specific_bmp(self):

        TESTS = [
                (u"Hello, World!", 
                "Hello, World!"),

                (u"'\"\r\n",
                 "'\"\r\n"),

                (u"ČŽŠčžš",
                 "CZSczs"),

                (u"ア",
                 "a"),

                (u"α",
                "a"),

                (u"а",
                "a"),

                (u'ch\xe2teau',
                "chateau"),

                (u'vi\xf1edos',
                "vinedos"),
                
                (u"\u5317\u4EB0",
                "Bei Jing "),

                (u"Efficient",
                "Efficient"),

                # Table that doesn't exist
                (u'\ua500',
                ''),
                
                # Table that has less than 256 entriees
                (u'\u1eff',
                ''),

                # Mark area
                (u"\u210a",  #gram mark
                "g"),
            ]

        u = Unihandecoder(lang="zh")
        for input, output in TESTS:
            self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 50
0
    def test_specific_supplementary(self):
        if sys.maxunicode < 0x1d6a4:
            print "skip test because of Narrow Python"
            return

        TESTS = [
            # Non-BMP character
            (u'\U0001d5a0', 'A'),

            # Mathematical
            (u'\U0001d5c4\U0001d5c6/\U0001d5c1', 'km/h'),
        ]
        u = Unihandecoder(lang="zh")
        for input, output in TESTS:
            self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 51
0
    def test_specific_supplementary(self):
        if sys.maxunicode < 0x1d6a4:
            print "skip test because of Narrow Python"
            return

        TESTS = [
                # Non-BMP character
                (u'\U0001d5a0',
                'A'),

                # Mathematical
                (u'\U0001d5c4\U0001d5c6/\U0001d5c1',
                'km/h'),
        ]
        u = Unihandecoder(lang="zh")
        for input, output in TESTS:
            self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 52
0
 def test_mathematical_latin(self):
     # 13 consecutive sequences of A-Z, a-z with some codepoints
     # undefined. We just count the undefined ones and don't check
     # positions.
     empty = 0
     u = Unihandecoder()
     for n in range(0x1d400, 0x1d6a4):
         if n % 52 < 26:
             a = chr(ord('A') + n % 26)
         else:
             a = chr(ord('a') + n % 26)
         b = u.decode(chr(n))
         
         if not b:
             empty += 1
         else:
             self.failUnlessEqual(b, a)
             
     self.failUnlessEqual(empty, 24)
Exemplo n.º 53
0
    def test_mathematical_latin(self):
        # 13 consecutive sequences of A-Z, a-z with some codepoints
        # undefined. We just count the undefined ones and don't check
        # positions.
        empty = 0
        u = Unihandecoder()
        for n in range(0x1d400, 0x1d6a4):
            if n % 52 < 26:
                a = chr(ord('A') + n % 26)
            else:
                a = chr(ord('a') + n % 26)
            b = u.decode(chr(n))

            if not b:
                empty += 1
            else:
                self.failUnlessEqual(b, a)

        self.failUnlessEqual(empty, 24)
Exemplo n.º 54
0
    def test_ja(self):
        JATESTS = [
            (u'\u660e\u65e5\u306f\u660e\u65e5\u306e\u98a8\u304c\u5439\u304f',
            'Ashita ha Ashita no Kaze ga Fuku'),
            (u"\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
            'Mei Tenmei Ten Teki Sui'),
            (u"馮", "Fuu"), # Fuu in human's name, Hyou in another case
            # regression tests
            (u'\u30d0\u30cb\u30fc\u3061\u3083\u3093\u3061\u306e\u30b7\u30e3\u30ef\u30fc\u30ce\u30ba\u30eb\u306e\u5148\u7aef',
            "bani-chanchinoshawa-nozuruno Sentan"),
            (u'\u3093\u301c\u30fb\u30fb\u30fb\u3002\u30b1\u30c4\u3063!\uff01',
            "n ~ .... ketsutsu !!"),
# Hiragana n Namisen katakana-middle-dot dot dot Touten, katakana KE, katakana TSU, Hiragana small TU, ASCII !, half width !.
            (u"ページへようこそ", 'pageheyoukoso'),
            # test for u30fc
            ]
        u = Unihandecoder(lang="ja")
        for input, output in JATESTS:
            self.failUnlessEqual(u.decode(input), output)
Exemplo n.º 55
0
 def test_ja(self):
     JATESTS = [
         ('\u660e\u65e5\u306f\u660e\u65e5\u306e\u98a8\u304c\u5439\u304f',
          'Ashita ha Ashita no Kaze ga Fuku'),
         ("\u660e\u5929\u660e\u5929\u7684\u98ce\u5439",
          'Mei Tenmei Ten Teki Sui'),
         # (u"馮", "Fuu"), # Fuu in human's name, Hyou in another case
         # regression tests
         ('\u30d0\u30cb\u30fc\u3061\u3083\u3093\u3061\u306e\u30b7\u30e3\u30ef\u30fc\u30ce\u30ba\u30eb\u306e\u5148\u7aef',
          "bani- chanchino shawa-nozuru no Sentan"),  # test for u30fc
         ('\u3093\u301c\u30fb\u30fb\u30fb\u3002\u30b1\u30c4\u3063!\uff01',
          "n ~.... ketsu tsu !!"),  #Hiragana n Namisen katakana-middle-dot
         #dot dot Touten, katakana KE, katakana
         #TSU, Hiragana small TU, ASCII !, half width !
         ("ページへようこそ", 'pe-ji heyoukoso'),
         ("森鴎外", 'Mori Ougai'),  # no-itaiji
         ("森鷗外", 'Mori Ougai'),  # itaiji
         ("する。", 'suru.'),  # end mark test
     ]
     u = Unihandecoder(lang="ja")
     for input, output in JATESTS:
         self.assertEqual(u.decode(input), output)
Exemplo n.º 56
0
def do_slugify(instance, tagName, adjust=None):

    """
        instance must be descendant instance of TagBase
    """

    if not is_ascii(tagName):
        # it's very rare invocation
        from unihandecode import Unihandecoder

        # I decide to use zh only first
        # it seems zh could also do ja
        # s = '明天明天的风吹明日は明日の風が吹く'
        # zh_d = Unihandecoder(lang='zh')
        # zh_d.decode(s)
        # 'Ming Tian Ming Tian De Feng Chui Ming Ri haMing Ri noFeng gaChui ku'
        # ja_d = Unihandecoder(lang='ja')
        # ja_d.decode(s)
        # 'Mei Tenmei Ten Teki Sui Ashita ha Ashita no Kaze ga Fuku'
        d = Unihandecoder(lang="zh")
        tagName = d.decode(tagName)
    # In this way, Tag object take curry bind self to this function
    return TagBase.slugify(instance, tagName, adjust)
Exemplo n.º 57
0
    def test_mathematical_latin(self):
        # 13 consecutive sequences of A-Z, a-z with some codepoints
        # undefined. We just count the undefined ones and don't check
        # positions.
        if sys.maxunicode < 0x1d6a4:
            print "skip test because of Narrow Python"
            return

        empty = 0
        u = Unihandecoder(lang="zh")
        for n in xrange(0x1d400, 0x1d6a4):
            if n % 52 < 26:
                a = chr(ord('A') + n % 26)
            else:
                a = chr(ord('a') + n % 26)
            b = u.decode(unichr(n))

            if not b:
                empty += 1
            else:
                self.failUnlessEqual(b, a)

        self.failUnlessEqual(empty, 24)
Exemplo n.º 58
0
    def test_mathematical_latin(self):
        # 13 consecutive sequences of A-Z, a-z with some codepoints
        # undefined. We just count the undefined ones and don't check
        # positions.
        if sys.maxunicode < 0x1d6a4:
            print "skip test because of Narrow Python"
            return

        empty = 0
        u = Unihandecoder(lang="zh")
        for n in xrange(0x1d400, 0x1d6a4):
            if n % 52 < 26:
                a = chr(ord('A') + n % 26)
            else:
                a = chr(ord('a') + n % 26)
            b = u.decode(unichr(n))

            if not b:
                empty += 1
            else:
                self.failUnlessEqual(b, a)

        self.failUnlessEqual(empty, 24)
Exemplo n.º 59
0
 def test_ascii(self):
     u = Unihandecoder()
     for n in range(0, 128):
         t = chr(n)
         self.failUnlessEqual(u.decode(t), t)
Exemplo n.º 60
0
"""
Tag parsing and printing

Loosely based on django-taggit and django-tagging
"""
from __future__ import unicode_literals

from django.utils.encoding import force_text

import unicodedata
try:
    from unihandecode import Unihandecoder
    unidecoder = Unihandecoder(lang="ja")
except ImportError: # pragma: no cover - tests simulate this
    unidecoder = None

from tagulous.constants import COMMA, SPACE, QUOTE, DOUBLE_QUOTE, TREE


###############################################################################
####### Tag name parse and render
###############################################################################

def parse_tags(tag_string, max_count=0, space_delimiter=True):
    """
    Tag parser

    Rules without quotes:
        If a comma is present it's used as the delimiter
        Otherwise space is used as the delimiter
        Spaces at the start and end of tags are ignored