示例#1
0
文件: hepburn.py 项目: hunza/htext
def reverse(value):
    value = value.lower()

    # step 1; tta -> ッta
    def step1(matcher):
        return "ッ%s" % matcher.group(1)
    value = RE_CONSONANTS_REVERSE.sub(step1, value)

    #
    def step2(matcher):
        return ROMAJI_TO_KANA_MAP[matcher.group(1)]
    value = RE_ROMAJI_THREE.sub(step2, value)
    value = RE_ROMAJI_TWO.sub(step2, value)
    value = RE_ROMAJI_ONE.sub(step2, value)

    # step 3
    def step3(matcher):
        return '%sン' % matcher.group(1)
    value = RE_N.sub(step3, value)

    return kana.to_hiragana(value)
示例#2
0
def reverse(value):
    value = value.lower()

    # step 1; tta -> ッta
    def step1(matcher):
        return "ッ%s" % matcher.group(1)

    value = RE_CONSONANTS_REVERSE.sub(step1, value)

    #
    def step2(matcher):
        return ROMAJI_TO_KANA_MAP[matcher.group(1)]

    value = RE_ROMAJI_THREE.sub(step2, value)
    value = RE_ROMAJI_TWO.sub(step2, value)
    value = RE_ROMAJI_ONE.sub(step2, value)

    # step 3
    def step3(matcher):
        return '%sン' % matcher.group(1)

    value = RE_N.sub(step3, value)

    return kana.to_hiragana(value)
示例#3
0
def test_mixed():
    output = kana.to_hiragana("カラマーゾフの兄弟")
    assert output == "からまーぞふの兄弟"

    output = kana.to_katakana("からまーぞふの兄弟")
    assert output == "カラマーゾフノ兄弟"
示例#4
0
 def func(input, expected):
     output = kana.to_hiragana(input)
     assert output == expected, "%s expected, got %s" % (expected, output)
示例#5
0
    def func(input, expected):
        output = kana.to_hiragana(input)
        assert isinstance(output, six.text_type) and output == expected

        output = kana.to_katakana(input)
        assert isinstance(output, six.text_type) and output == expected
示例#6
0
def romanize(value, long_vowels_h=False):
    # convert katakana to hiragana
    value = kana.to_hiragana(value)

    output = []
    last_hepburn = None
    last_char = None

    value_length = len(value)
    index = 0

    while index < value_length:
        char, hepburn = _hepburn_for(value, index)

        if char == u"ん":
            ## 1. 撥音 ヘボン式ではB/M/Pの前にNの代わりにMをおく
            next_char, next_hepburn = _hepburn_for(value, index + 1)
            if next_hepburn is not None and next_hepburn[0] in ('BMP'):
                hepburn = 'M'
            else:
                hepburn = 'N'

        elif char == u"っ":
            ## 2. 促音 子音を重ねて示す
            next_char, next_hepburn = _hepburn_for(value, index + 1)

            # チ(CHI), チャ(CHA), チュ(CHU), チョ(CHO)音に限り, その前にTを加える
            if next_hepburn is not None and next_hepburn.startswith('CH'):
                hepburn = 'T'
            else:
                hepburn = next_hepburn[0]

        elif char == u"ー":
            ## 3. 長音 ヘボン式では長音を表記しない
            hepburn = ""

        #if 0:
        #    ## Japanese Passport table doesn't have entries for ぁ-ぉ
        #    pass

        if hepburn is not None:
            if last_hepburn is not None:
                h_test = last_hepburn + hepburn
                # check last two letters
                h_test = h_test[-2:]

                if h_test in ('AA', 'II', 'UU', 'EE'):
                    # 3. 長音 ヘボン式では長音を表記しない
                    hepburn = ''

                if h_test in ('OO', 'OU'):
                    hepburn = long_vowels_h and 'H' or ''

            output.append(hepburn)

        else:
            # Can't find hepburn replacement for the given char
            pass

        last_hepburn = hepburn
        last_char = char
        index += len(char)

    return ''.join(output)
示例#7
0
文件: passport.py 项目: hunza/htext
def romanize(value, long_vowels_h=False):
    # convert katakana to hiragana
    value = kana.to_hiragana(value)

    output = []
    last_hepburn = None

    value_length = len(value)
    index = 0

    while index < value_length:
        char, hepburn = _hepburn_for(value, index)

        if char == u"ん":
            # 1. 撥音 ヘボン式ではB/M/Pの前にNの代わりにMをおく
            next_char, next_hepburn = _hepburn_for(value, index + 1)
            if next_hepburn is not None and next_hepburn[0] in ('BMP'):
                hepburn = 'M'
            else:
                hepburn = 'N'

        elif char == u"っ":
            # 2. 促音 子音を重ねて示す
            next_char, next_hepburn = _hepburn_for(value, index + 1)

            # チ(CHI), チャ(CHA), チュ(CHU), チョ(CHO)音に限り, その前にTを加える
            if next_hepburn is not None and next_hepburn.startswith('CH'):
                hepburn = 'T'
            else:
                hepburn = next_hepburn[0]

        elif char == u"ー":
            # 3. 長音 ヘボン式では長音を表記しない
            hepburn = ""

        """
        if 0:
            # Japanese Passport table doesn't have entries for ぁ-ぉ
            pass
        """

        if hepburn is not None:
            if last_hepburn is not None:
                h_test = last_hepburn + hepburn
                # check last two letters
                h_test = h_test[-2:]

                if h_test in ('AA', 'II', 'UU', 'EE'):
                    # 3. 長音 ヘボン式では長音を表記しない
                    hepburn = ''

                if h_test in ('OO', 'OU'):
                    hepburn = long_vowels_h and 'H' or ''

            output.append(hepburn)

        else:
            # Can't find hepburn replacement for the given char
            pass

        last_hepburn = hepburn
        index += len(char)

    return ''.join(output)
示例#8
0
 def func(input, expected):
     output = kana.to_hiragana(input)
     assert output == expected, "%s expected, got %s" % (expected, output)
示例#9
0
    def func(input, expected):
        output = kana.to_hiragana(input)
        assert isinstance(output, six.text_type) and output == expected

        output = kana.to_katakana(input)
        assert isinstance(output, six.text_type) and output == expected
示例#10
0
def test_mixed():
    output = kana.to_hiragana("カラマーゾフの兄弟")
    assert output == "からまーぞふの兄弟"

    output = kana.to_katakana("からまーぞふの兄弟")
    assert output == "カラマーゾフノ兄弟"