Пример #1
0
def test_custom_style_with_call():
    style_value = 'test_custom_style_with_call'

    def func(pinyin, **kwargs):
        return str(len(pinyin))

    register(style_value, func=func)

    hans = '北京'
    origin_pinyin_s = pinyin(hans)
    expected_pinyin_s = deepcopy(origin_pinyin_s)
    for pinyin_s in expected_pinyin_s:
        for index, py in enumerate(pinyin_s):
            pinyin_s[index] = func(py)

    assert pinyin(hans, style=style_value) == expected_pinyin_s
Пример #2
0
def test_custom_style_with_call():
    style_value = 'test_custom_style_with_call'

    def func(pinyin, **kwargs):
        return str(len(pinyin))

    register(style_value, func=func)

    hans = '北京'
    origin_pinyin_s = pinyin(hans)
    expected_pinyin_s = deepcopy(origin_pinyin_s)
    for pinyin_s in expected_pinyin_s:
        for index, py in enumerate(pinyin_s):
            pinyin_s[index] = func(py)

    assert pinyin(hans, style=style_value) == expected_pinyin_s
Пример #3
0
)
CYRILLIC_TABLE = dict(zip(
    u'abwgdEOrZiIklmnopRstufhqcCSHTMUevAV',
    u'абвгдеёжзийклмнопрстуфхццчшщъьыэюяю'
))


class CyrillicfoConverter(object):
    def to_cyrillic(self, pinyin, **kwargs):
        pinyin = self._pre_convert(pinyin)
        # 查表替换成注音
        for find_re, replace in CYRILLIC_REPLACE:
            pinyin = find_re.sub(replace, pinyin)
        pinyin = ''.join(CYRILLIC_TABLE.get(x, x) for x in pinyin)
        return pinyin

    def to_cyrillic_first(self, pinyin, **kwargs):
        pinyin = self.to_cyrillic(pinyin, **kwargs)
        return pinyin[0]

    def _pre_convert(self, pinyin):
        # 用数字表示声调
        pinyin = replace_symbol_to_number(pinyin)
        # 将声调数字移动到最后
        return RE_TONE3.sub(r'\1\3\2', pinyin)


converter = CyrillicfoConverter()
register(Style.CYRILLIC, func=converter.to_cyrillic)
register(Style.CYRILLIC_FIRST, func=converter.to_cyrillic_first)
Пример #4
0
from pypinyin.constants import Style
from pypinyin.style import register
from pypinyin.style._tone_convert import (to_finals, to_finals_tone,
                                          to_finals_tone2, to_finals_tone3)


class FinalsConverter(object):
    def to_finals(self, pinyin, **kwargs):
        """无声调韵母"""
        return to_finals(pinyin, strict=kwargs.get('strict', True))

    def to_finals_tone(self, pinyin, **kwargs):
        """声调在韵母头上"""
        return to_finals_tone(pinyin, strict=kwargs.get('strict', True))

    def to_finals_tone2(self, pinyin, **kwargs):
        """数字声调"""
        return to_finals_tone2(pinyin, strict=kwargs.get('strict', True))

    def to_finals_tone3(self, pinyin, **kwargs):
        """数字声调"""
        return to_finals_tone3(pinyin, strict=kwargs.get('strict', True))


converter = FinalsConverter()
register(Style.FINALS, func=converter.to_finals)
register(Style.FINALS_TONE, func=converter.to_finals_tone)
register(Style.FINALS_TONE2, func=converter.to_finals_tone2)
register(Style.FINALS_TONE3, func=converter.to_finals_tone3)
Пример #5
0
)
BOPOMOFO_TABLE = dict(zip(
    'bpmfdtnlgkhjqxZCSrzcsiuvaoeEAIOUMNKGR2340',
    'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄧㄨㄩㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦˊˇˋ˙'
))


class BopomofoConverter(object):
    def to_bopomofo(self, pinyin, **kwargs):
        pinyin = self._pre_convert(pinyin)
        # 查表替换成注音
        for find_re, replace in BOPOMOFO_REPLACE:
            pinyin = find_re.sub(replace, pinyin)
        pinyin = ''.join(BOPOMOFO_TABLE.get(x, x) for x in pinyin)
        return pinyin

    def to_bopomofo_first(self, pinyin, **kwargs):
        pinyin = self.to_bopomofo(pinyin, **kwargs)
        return pinyin[0]

    def _pre_convert(self, pinyin):
        # 用数字表示声调
        pinyin = replace_symbol_to_number(pinyin)
        # 将声调数字移动到最后
        return RE_TONE3.sub(r'\1\3\2', pinyin)


converter = BopomofoConverter()
register(Style.BOPOMOFO, func=converter.to_bopomofo)
register(Style.BOPOMOFO_FIRST, func=converter.to_bopomofo_first)
Пример #6
0
# -*- coding: utf-8 -*-
"""其他几个拼音风格实现:

Style.NORMAL
Style.FIRST_LETTER
"""
from __future__ import unicode_literals

from pypinyin.constants import Style
from pypinyin.style import register
from pypinyin.style._utils import replace_symbol_to_no_symbol


class OthersConverter(object):
    def to_normal(self, pinyin, **kwargs):
        pinyin = replace_symbol_to_no_symbol(pinyin)
        return pinyin

    def to_first_letter(self, pinyin, **kwargs):
        # 用数字表示声调
        pinyin = self.to_normal(pinyin)
        return pinyin[0]


converter = OthersConverter()
register(Style.NORMAL, func=converter.to_normal)
register(Style.FIRST_LETTER, func=converter.to_first_letter)
Пример #7
0
Style.TONE2
Style.TONE3
"""
from __future__ import unicode_literals

from pypinyin.constants import Style
from pypinyin.style import register
from pypinyin.style._constants import RE_TONE3
from pypinyin.style._utils import replace_symbol_to_number


class ToneConverter(object):
    def to_tone(self, pinyin, **kwargs):
        return pinyin

    def to_tone2(self, pinyin, **kwargs):
        # 用数字表示声调
        pinyin = replace_symbol_to_number(pinyin)
        return pinyin

    def to_tone3(self, pinyin, **kwargs):
        pinyin = self.to_tone2(pinyin, **kwargs)
        # 将声调数字移动到最后
        return RE_TONE3.sub(r'\1\3\2', pinyin)


converter = ToneConverter()
register(Style.TONE, func=converter.to_tone)
register(Style.TONE2, func=converter.to_tone2)
register(Style.TONE3, func=converter.to_tone3)
Пример #8
0
    whole_converted = _convert_whole(pinyin, _except_table)
    if whole_converted != pinyin:
        return _fixed_result(whole_converted)
    whole_converted = _convert_whole(pinyin, _convert_table)
    if whole_converted != pinyin:
        return _fixed_result(whole_converted)

    initials = get_initials(pinyin, strict=False)
    tones = pinyin[len(initials):]

    initials = _convert_whole(initials, _initial_table)
    tones = _convert_whole(tones, _tone_table)

    return _fixed_result('{}{}'.format(initials, tones))


def _fixed_result(pinyin):
    return pinyin.replace('ü', 'v')


def _convert_whole(chars, table):
    for pair in table:
        f, r = pair
        if f == chars:
            return r

    return chars


register(Style.WADEGILES, func=to_wade_glides)
Пример #9
0
    (re.compile('^v(\d?$)'), 'vI'),
)
CYRILLIC_TABLE = dict(
    zip(u'abwgdEOrZiIklmnopRstufhqcCSHTMUevAV',
        u'абвгдеёжзийклмнопрстуфхццчшщъьыэюяю'))


class CyrillicfoConverter(object):
    def to_cyrillic(self, pinyin, **kwargs):
        pinyin = self._pre_convert(pinyin)
        # 查表替换成注音
        for find_re, replace in CYRILLIC_REPLACE:
            pinyin = find_re.sub(replace, pinyin)
        pinyin = ''.join(CYRILLIC_TABLE.get(x, x) for x in pinyin)
        return pinyin

    def to_cyrillic_first(self, pinyin, **kwargs):
        pinyin = self.to_cyrillic(pinyin, **kwargs)
        return pinyin[0]

    def _pre_convert(self, pinyin):
        # 用数字表示声调
        pinyin = replace_symbol_to_number(pinyin)
        # 将声调数字移动到最后
        return RE_TONE3.sub(r'\1\3\2', pinyin)


converter = CyrillicfoConverter()
register(Style.CYRILLIC, func=converter.to_cyrillic)
register(Style.CYRILLIC_FIRST, func=converter.to_cyrillic_first)
Пример #10
0
    (re.compile('1$'), ''),
)
BOPOMOFO_TABLE = dict(
    zip('bpmfdtnlgkhjqxZCSrzcsiuvaoeEAIOUMNKGR2340',
        'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄧㄨㄩㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦˊˇˋ˙'))


class BopomofoConverter(object):
    def to_bopomofo(self, pinyin, **kwargs):
        pinyin = self._pre_convert(pinyin)
        # 查表替换成注音
        for find_re, replace in BOPOMOFO_REPLACE:
            pinyin = find_re.sub(replace, pinyin)
        pinyin = ''.join(BOPOMOFO_TABLE.get(x, x) for x in pinyin)
        return pinyin

    def to_bopomofo_first(self, pinyin, **kwargs):
        pinyin = self.to_bopomofo(pinyin, **kwargs)
        return pinyin[0]

    def _pre_convert(self, pinyin):
        # 用数字表示声调
        pinyin = replace_symbol_to_number(pinyin)
        # 将声调数字移动到最后
        return RE_TONE3.sub(r'\1\3\2', pinyin)


converter = BopomofoConverter()
register(Style.BOPOMOFO, func=converter.to_bopomofo)
register(Style.BOPOMOFO_FIRST, func=converter.to_bopomofo_first)
Пример #11
0
import pypinyin
from pypinyin.style import register
from .textnorm import textnorm
from .nonstd_pinyin import _nonstd_style
import jieba

jieba.initialize()
register('nonstd', _nonstd_style)


def _get_pinyin(text, std=True, pb=False):
    '''
    Params:
        text: string, normalized sentences
        std: boolean, standard pinyin stylc, default: standard pinyin style
        pb: boolen, prosody label, default: False
    
    Returns:
        pinyin: string
    '''
    if std:
        style = pypinyin.Style.TONE3
    else:
        style = 'nonstd'

    text = text.strip()

    punctuation = ', '
    if text[-1:] in list(',.?!'):
        punctuation = text[-1:] + ' '
        text = text[:-1]
Пример #12
0
        has_fi = has_finals(pinyin)

        # 用数字表示声调
        pinyin = replace_symbol_to_number(pinyin)
        if not has_fi:
            return pinyin
        # 获取韵母部分
        return get_finals(pinyin, strict=False)

    def to_finals_tone3(self, pinyin, **kwargs):
        if kwargs.get('strict'):
            pinyin = convert_finals(pinyin)
        has_fi = has_finals(pinyin)

        # 用数字表示声调
        pinyin = replace_symbol_to_number(pinyin)
        # 将声调数字移动到最后
        pinyin = RE_TONE3.sub(r'\1\3\2', pinyin)

        if not has_fi:
            return pinyin
        # 获取韵母部分
        return get_finals(pinyin, strict=False)


converter = FinalsConverter()
register(Style.FINALS, func=converter.to_finals)
register(Style.FINALS_TONE, func=converter.to_finals_tone)
register(Style.FINALS_TONE2, func=converter.to_finals_tone2)
register(Style.FINALS_TONE3, func=converter.to_finals_tone3)