Exemplo n.º 1
0
    def test_translate(self):
        pua = u'나랏\u302e말\u302f미\u302e 中國귁에\u302e 달아\u302e 문와\u302e로 서르 디\u302e 아니\u302e\u302e'  # noqa

        from hypua2jamo import translate
        expected = u'나랏〮말〯ᄊᆞ미〮 中듀ᇰ國귁에〮 달아〮 문ᄍᆞᆼ와〮로 서르 ᄉᆞᄆᆞᆺ디〮 아니〮ᄒᆞᆯᄊᆡ〮'

        result = translate(pua)

        self.assertEqual(expected, result)
Exemplo n.º 2
0
    def test_translate_composed(self):
        from hypua2jamo import translate

        jamo_string = translate(
            Fixtures.HunMinPreface.pua_string,
        )
        self.assertEqual(
            Fixtures.HunMinPreface.composed_jamo_string,
            jamo_string,
        )
Exemplo n.º 3
0
    def test_conversion(self):
        pua = u'나랏\u302e말\u302f미\u302e 中國귁에\u302e 달아\u302e 문와\u302e로 서르 디\u302e 아니\u302e\u302e'

        from hypua2jamo import translate
        expected = u'나랏〮말〯ᄊᆞ미〮 中듀ᇰ國귁에〮 달아〮 문ᄍᆞᆼ와〮로 서르 ᄉᆞᄆᆞᆺ디〮 아니〮ᄒᆞᆯᄊᆡ〮'

        result = translate(pua)

        for p, e, r in zip(pua.split(' '), expected.split(' '), result.split(' ')):
            print('P %s %r' % (p.encode('utf-8'), p))
            print('E %s %r' % (e.encode('utf-8'), e))
            print('R %s %r' % (r.encode('utf-8'), r))

        self.assertEquals(expected, result)
Exemplo n.º 4
0
    def test_conversion(self):
        pua = u"나랏\u302e말\u302f미\u302e 中國귁에\u302e 달아\u302e 문와\u302e로 서르 디\u302e 아니\u302e\u302e"

        from hypua2jamo import translate

        expected = u"나랏〮말〯ᄊᆞ미〮 中듀ᇰ國귁에〮 달아〮 문ᄍᆞᆼ와〮로 서르 ᄉᆞᄆᆞᆺ디〮 아니〮ᄒᆞᆯᄊᆡ〮"

        result = translate(pua)

        for p, e, r in zip(pua.split(" "), expected.split(" "), result.split(" ")):
            print "P", p, repr(p)
            print "E", e, repr(e)
            print "R", r, repr(r)

        self.assertEquals(expected, result)
Exemplo n.º 5
0
def decode_string(s):
    # 한양 PUA
    s = hypua2jamo.translate(s)
    # 우리말샘 사이트 웹폰트
    while True:
        m = re.search(
            '(?:<span class="korean-webfont">|<equ>)(&#x[0-9A-F]{1,6};|.)(?:</span>|</equ>)?',
            s)
        if not m:
            break
        ch = m.group(1)
        pua_map = {
            '\uE01D': '\u0254\u0342',  # ɔ͂
            '\uE01E': '\u025B\u0342',  # ɛ͂
            '\uE01F': 'n\u0304',  # n̄
            '\uE020': '𝆑𝆑𝆑',
            '\uE021': '𝆑𝆑',
            '\uE022': '𝆑𝆏',
            '\uE023': '𝆑𝆎',
            '\uE024': '▞',
            '\uE025': '▚',
            '\uE026': '\u3001',  # IDEOGRAPHIC COMMA
            '\uE02C': 'ᅟᅵᇰ',
            '\uE02E': '타ᇦ'
        }

        if ch.startswith('&#x') and ch.endswith(';'):
            ch = chr(int(ch[3:-1], 16))

        if ord(ch) in range(0xE000, 0xF8FF + 1) or ord(ch) in range(
                0xF0000, 0xFFFFD + 1) or ord(ch) in range(
                    0x100000, 0x10FFFD + 1):
            if ch in pua_map:
                ch = pua_map[ch]
            else:
                ch = '<webfont>U+%X</webfont>' % ord(ch)

        a, b = m.span(0)
        s = s[:a] + ch + s[b:]
    # TODO: 기타 PUA
    return s
Exemplo n.º 6
0
    def test_jc2p_decode(self):
        def translate(jamo_string):
            decoder = self.make_decoder()
            return decoder.decode(jamo_string, final=True)

        pua = self.OUTPUT_STRING
        jamo = self.INPUT_STRING

        self.assertEqual(pua[:1], translate(jamo[:1]))    # 나
        self.assertEqual(pua[:2], translate(jamo[:2]))    # 랏
        self.assertEqual(pua[:3], translate(jamo[:3]))
        self.assertEqual(pua[:4], translate(jamo[:4]))
        self.assertEqual(pua[:5], translate(jamo[:5]))
        self.assertEqual(pua[:5] + u'\uf7ca', translate(jamo[:6]))
        self.assertEqual(pua[:6], translate(jamo[:7]))
        self.assertEqual(pua[:7], translate(jamo[:8]))
        self.assertEqual(pua[:8], translate(jamo[:9]))
        self.assertEqual(pua[:9], translate(jamo[:10]))

        self.assertEqual(pua[:10], translate(jamo[:11]))  # 中
        self.assertEqual(pua[:10] + u'\uf790', translate(jamo[:12]))
        self.assertEqual(pua[:10] + u'\u1103\u1172', translate(jamo[:13]))
        self.assertEqual(pua[:11], translate(jamo[:14]))
        self.assertEqual(pua[:12], translate(jamo[:15]))  # 國
        self.assertEqual(pua[:13], translate(jamo[:16]))  # 귁
        self.assertEqual(pua[:14], translate(jamo[:17]))  # 에
        self.assertEqual(pua[:15], translate(jamo[:18]))  # u302e
        self.assertEqual(pua[:16], translate(jamo[:19]))  # u0020

        self.assertEqual(pua[:17], translate(jamo[:20]))  # 달
        self.assertEqual(pua[:18], translate(jamo[:21]))  # 아
        self.assertEqual(pua[:19], translate(jamo[:22]))  # u302e
        self.assertEqual(pua[:20], translate(jamo[:23]))  # u0020

        self.assertEqual(pua[:21], translate(jamo[:24]))  # 문
        self.assertEqual(pua[:21] + u'\uf7ea', translate(jamo[:25]))  #
        self.assertEqual(pua[:21] + u'\uf250', translate(jamo[:26]))  #
        self.assertEqual(pua[:22], translate(jamo[:27]))  #
        self.assertEqual(pua[:23], translate(jamo[:28]))  # 와
        self.assertEqual(pua[:24], translate(jamo[:29]))  # u302e
        self.assertEqual(pua[:25], translate(jamo[:30]))  # 로
        self.assertEqual(pua[:26], translate(jamo[:31]))  # u0020

        self.assertEqual(pua[:27], translate(jamo[:32]))  # 서
        self.assertEqual(pua[:28], translate(jamo[:33]))  # 르
        self.assertEqual(pua[:29], translate(jamo[:34]))  # u0020

        self.assertEqual(pua[:29] + u'\uf7c2', translate(jamo[:35]))
        self.assertEqual(pua[:30], translate(jamo[:36]))
        self.assertEqual(pua[:30] + u'\uf7a8', translate(jamo[:37]))
        self.assertEqual(pua[:30] + u'\ue560', translate(jamo[:38]))
        self.assertEqual(pua[:31], translate(jamo[:39]))
        self.assertEqual(pua[:32], translate(jamo[:40]))  # 디
        self.assertEqual(pua[:33], translate(jamo[:41]))  # u302e
        self.assertEqual(pua[:34], translate(jamo[:42]))  # u0020

        self.assertEqual(pua[:35], translate(jamo[:43]))  # 아
        self.assertEqual(pua[:36], translate(jamo[:44]))  # 니
        self.assertEqual(pua[:37], translate(jamo[:45]))  # u302e
        self.assertEqual(pua[:37] + u'\uf7fc', translate(jamo[:46]))
        self.assertEqual(pua[:37] + u'\uf537', translate(jamo[:47]))
        self.assertEqual(pua[:38], translate(jamo[:48]))
        self.assertEqual(pua[:38] + u'\uf7ca', translate(jamo[:49]))
        self.assertEqual(pua[:39], translate(jamo[:50]))
        self.assertEqual(pua[:40], translate(jamo[:51]))  # 302e

        self.assertEqual(pua[:40], translate(jamo))
Exemplo n.º 7
0
    def test_jc2p_decode(self):
        from cffi import FFI
        from hypua2jamo._cffi import lib

        unicode_size = array('u').itemsize
        if unicode_size == 4:
            _translate = lib.hypua_jc2p_ucs4_decode
            _calcsize = lib.hypua_jc2p_ucs4_calcsize
        elif unicode_size == 2:
            _translate = lib.hypua_jc2p_ucs2_decode
            _calcsize = lib.hypua_jc2p_ucs2_calcsize
        else:
            raise AssertionError(unicode_size)

        ffi = FFI()

        def translate(jamo_string):
            jamo_array = array('u', jamo_string)
            jamo_ptr, jamo_len = jamo_array.buffer_info()
            jamo_ptr = ffi.cast('void *', jamo_ptr)

            pua_size = _calcsize(jamo_ptr, jamo_len)

            pua_array = array('u', u' ' * pua_size)
            pua_ptr = pua_array.buffer_info()[0]
            pua_ptr = ffi.cast('void *', pua_ptr)
            pua_len = _translate(jamo_ptr, jamo_len, pua_ptr)
            if pua_size != pua_len:
                raise Exception('%r != %r', pua_size, pua_len)
            return pua_array.tounicode()

        pua = self.pua_string
        jamo = self.jamo_string

        self.assertEqual(pua[:1], translate(jamo[:1]))    # 나
        self.assertEqual(pua[:2], translate(jamo[:2]))    # 랏
        self.assertEqual(pua[:3], translate(jamo[:3]))
        self.assertEqual(pua[:4], translate(jamo[:4]))
        self.assertEqual(pua[:5], translate(jamo[:5]))
        self.assertEqual(pua[:5] + u'\uf7ca', translate(jamo[:6]))
        self.assertEqual(pua[:6], translate(jamo[:7]))
        self.assertEqual(pua[:7], translate(jamo[:8]))
        self.assertEqual(pua[:8], translate(jamo[:9]))
        self.assertEqual(pua[:9], translate(jamo[:10]))

        self.assertEqual(pua[:10], translate(jamo[:11]))  # 中
        self.assertEqual(pua[:10] + u'\uf790', translate(jamo[:12]))
        self.assertEqual(pua[:10] + u'\u1103\u1172', translate(jamo[:13]))
        self.assertEqual(pua[:11], translate(jamo[:14]))
        self.assertEqual(pua[:12], translate(jamo[:15]))  # 國
        self.assertEqual(pua[:13], translate(jamo[:16]))  # 귁
        self.assertEqual(pua[:14], translate(jamo[:17]))  # 에
        self.assertEqual(pua[:15], translate(jamo[:18]))  # u302e
        self.assertEqual(pua[:16], translate(jamo[:19]))  # u0020

        self.assertEqual(pua[:17], translate(jamo[:20]))  # 달
        self.assertEqual(pua[:18], translate(jamo[:21]))  # 아
        self.assertEqual(pua[:19], translate(jamo[:22]))  # u302e
        self.assertEqual(pua[:20], translate(jamo[:23]))  # u0020

        self.assertEqual(pua[:21], translate(jamo[:24]))  # 문
        self.assertEqual(pua[:21] + u'\uf7ea', translate(jamo[:25]))  #
        self.assertEqual(pua[:21] + u'\uf250', translate(jamo[:26]))  #
        self.assertEqual(pua[:22], translate(jamo[:27]))  #
        self.assertEqual(pua[:23], translate(jamo[:28]))  # 와
        self.assertEqual(pua[:24], translate(jamo[:29]))  # u302e
        self.assertEqual(pua[:25], translate(jamo[:30]))  # 로
        self.assertEqual(pua[:26], translate(jamo[:31]))  # u0020

        self.assertEqual(pua[:27], translate(jamo[:32]))  # 서
        self.assertEqual(pua[:28], translate(jamo[:33]))  # 르
        self.assertEqual(pua[:29], translate(jamo[:34]))  # u0020

        self.assertEqual(pua[:29] + u'\uf7c2', translate(jamo[:35]))
        self.assertEqual(pua[:30], translate(jamo[:36]))
        self.assertEqual(pua[:30] + u'\uf7a8', translate(jamo[:37]))
        self.assertEqual(pua[:30] + u'\ue560', translate(jamo[:38]))
        self.assertEqual(pua[:31], translate(jamo[:39]))
        self.assertEqual(pua[:32], translate(jamo[:40]))  # 디
        self.assertEqual(pua[:33], translate(jamo[:41]))  # u302e
        self.assertEqual(pua[:34], translate(jamo[:42]))  # u0020

        self.assertEqual(pua[:35], translate(jamo[:43]))  # 아
        self.assertEqual(pua[:36], translate(jamo[:44]))  # 니
        self.assertEqual(pua[:37], translate(jamo[:45]))  # u302e
        self.assertEqual(pua[:37] + u'\uf7fc', translate(jamo[:46]))
        self.assertEqual(pua[:37] + u'\uf537', translate(jamo[:47]))
        self.assertEqual(pua[:38], translate(jamo[:48]))
        self.assertEqual(pua[:38] + u'\uf7ca', translate(jamo[:49]))
        self.assertEqual(pua[:39], translate(jamo[:50]))
        self.assertEqual(pua[:40], translate(jamo[:51]))  # 302e

        self.assertEqual(pua[:40], translate(jamo))