def test_custom_style_with_call(): style_value = 'test_custom_style_with_call' def func(pinyin, **kwargs): return str(len(pinyin)) register(style_value, func=func) hans = '北京' origin_pinyin_s = pinyin(hans) expected_pinyin_s = deepcopy(origin_pinyin_s) for pinyin_s in expected_pinyin_s: for index, py in enumerate(pinyin_s): pinyin_s[index] = func(py) assert pinyin(hans, style=style_value) == expected_pinyin_s
) CYRILLIC_TABLE = dict(zip( u'abwgdEOrZiIklmnopRstufhqcCSHTMUevAV', u'абвгдеёжзийклмнопрстуфхццчшщъьыэюяю' )) class CyrillicfoConverter(object): def to_cyrillic(self, pinyin, **kwargs): pinyin = self._pre_convert(pinyin) # 查表替换成注音 for find_re, replace in CYRILLIC_REPLACE: pinyin = find_re.sub(replace, pinyin) pinyin = ''.join(CYRILLIC_TABLE.get(x, x) for x in pinyin) return pinyin def to_cyrillic_first(self, pinyin, **kwargs): pinyin = self.to_cyrillic(pinyin, **kwargs) return pinyin[0] def _pre_convert(self, pinyin): # 用数字表示声调 pinyin = replace_symbol_to_number(pinyin) # 将声调数字移动到最后 return RE_TONE3.sub(r'\1\3\2', pinyin) converter = CyrillicfoConverter() register(Style.CYRILLIC, func=converter.to_cyrillic) register(Style.CYRILLIC_FIRST, func=converter.to_cyrillic_first)
from pypinyin.constants import Style from pypinyin.style import register from pypinyin.style._tone_convert import (to_finals, to_finals_tone, to_finals_tone2, to_finals_tone3) class FinalsConverter(object): def to_finals(self, pinyin, **kwargs): """无声调韵母""" return to_finals(pinyin, strict=kwargs.get('strict', True)) def to_finals_tone(self, pinyin, **kwargs): """声调在韵母头上""" return to_finals_tone(pinyin, strict=kwargs.get('strict', True)) def to_finals_tone2(self, pinyin, **kwargs): """数字声调""" return to_finals_tone2(pinyin, strict=kwargs.get('strict', True)) def to_finals_tone3(self, pinyin, **kwargs): """数字声调""" return to_finals_tone3(pinyin, strict=kwargs.get('strict', True)) converter = FinalsConverter() register(Style.FINALS, func=converter.to_finals) register(Style.FINALS_TONE, func=converter.to_finals_tone) register(Style.FINALS_TONE2, func=converter.to_finals_tone2) register(Style.FINALS_TONE3, func=converter.to_finals_tone3)
) BOPOMOFO_TABLE = dict(zip( 'bpmfdtnlgkhjqxZCSrzcsiuvaoeEAIOUMNKGR2340', 'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄧㄨㄩㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦˊˇˋ˙' )) class BopomofoConverter(object): def to_bopomofo(self, pinyin, **kwargs): pinyin = self._pre_convert(pinyin) # 查表替换成注音 for find_re, replace in BOPOMOFO_REPLACE: pinyin = find_re.sub(replace, pinyin) pinyin = ''.join(BOPOMOFO_TABLE.get(x, x) for x in pinyin) return pinyin def to_bopomofo_first(self, pinyin, **kwargs): pinyin = self.to_bopomofo(pinyin, **kwargs) return pinyin[0] def _pre_convert(self, pinyin): # 用数字表示声调 pinyin = replace_symbol_to_number(pinyin) # 将声调数字移动到最后 return RE_TONE3.sub(r'\1\3\2', pinyin) converter = BopomofoConverter() register(Style.BOPOMOFO, func=converter.to_bopomofo) register(Style.BOPOMOFO_FIRST, func=converter.to_bopomofo_first)
# -*- coding: utf-8 -*- """其他几个拼音风格实现: Style.NORMAL Style.FIRST_LETTER """ from __future__ import unicode_literals from pypinyin.constants import Style from pypinyin.style import register from pypinyin.style._utils import replace_symbol_to_no_symbol class OthersConverter(object): def to_normal(self, pinyin, **kwargs): pinyin = replace_symbol_to_no_symbol(pinyin) return pinyin def to_first_letter(self, pinyin, **kwargs): # 用数字表示声调 pinyin = self.to_normal(pinyin) return pinyin[0] converter = OthersConverter() register(Style.NORMAL, func=converter.to_normal) register(Style.FIRST_LETTER, func=converter.to_first_letter)
Style.TONE2 Style.TONE3 """ from __future__ import unicode_literals from pypinyin.constants import Style from pypinyin.style import register from pypinyin.style._constants import RE_TONE3 from pypinyin.style._utils import replace_symbol_to_number class ToneConverter(object): def to_tone(self, pinyin, **kwargs): return pinyin def to_tone2(self, pinyin, **kwargs): # 用数字表示声调 pinyin = replace_symbol_to_number(pinyin) return pinyin def to_tone3(self, pinyin, **kwargs): pinyin = self.to_tone2(pinyin, **kwargs) # 将声调数字移动到最后 return RE_TONE3.sub(r'\1\3\2', pinyin) converter = ToneConverter() register(Style.TONE, func=converter.to_tone) register(Style.TONE2, func=converter.to_tone2) register(Style.TONE3, func=converter.to_tone3)
whole_converted = _convert_whole(pinyin, _except_table) if whole_converted != pinyin: return _fixed_result(whole_converted) whole_converted = _convert_whole(pinyin, _convert_table) if whole_converted != pinyin: return _fixed_result(whole_converted) initials = get_initials(pinyin, strict=False) tones = pinyin[len(initials):] initials = _convert_whole(initials, _initial_table) tones = _convert_whole(tones, _tone_table) return _fixed_result('{}{}'.format(initials, tones)) def _fixed_result(pinyin): return pinyin.replace('ü', 'v') def _convert_whole(chars, table): for pair in table: f, r = pair if f == chars: return r return chars register(Style.WADEGILES, func=to_wade_glides)
(re.compile('^v(\d?$)'), 'vI'), ) CYRILLIC_TABLE = dict( zip(u'abwgdEOrZiIklmnopRstufhqcCSHTMUevAV', u'абвгдеёжзийклмнопрстуфхццчшщъьыэюяю')) class CyrillicfoConverter(object): def to_cyrillic(self, pinyin, **kwargs): pinyin = self._pre_convert(pinyin) # 查表替换成注音 for find_re, replace in CYRILLIC_REPLACE: pinyin = find_re.sub(replace, pinyin) pinyin = ''.join(CYRILLIC_TABLE.get(x, x) for x in pinyin) return pinyin def to_cyrillic_first(self, pinyin, **kwargs): pinyin = self.to_cyrillic(pinyin, **kwargs) return pinyin[0] def _pre_convert(self, pinyin): # 用数字表示声调 pinyin = replace_symbol_to_number(pinyin) # 将声调数字移动到最后 return RE_TONE3.sub(r'\1\3\2', pinyin) converter = CyrillicfoConverter() register(Style.CYRILLIC, func=converter.to_cyrillic) register(Style.CYRILLIC_FIRST, func=converter.to_cyrillic_first)
(re.compile('1$'), ''), ) BOPOMOFO_TABLE = dict( zip('bpmfdtnlgkhjqxZCSrzcsiuvaoeEAIOUMNKGR2340', 'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄧㄨㄩㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦˊˇˋ˙')) class BopomofoConverter(object): def to_bopomofo(self, pinyin, **kwargs): pinyin = self._pre_convert(pinyin) # 查表替换成注音 for find_re, replace in BOPOMOFO_REPLACE: pinyin = find_re.sub(replace, pinyin) pinyin = ''.join(BOPOMOFO_TABLE.get(x, x) for x in pinyin) return pinyin def to_bopomofo_first(self, pinyin, **kwargs): pinyin = self.to_bopomofo(pinyin, **kwargs) return pinyin[0] def _pre_convert(self, pinyin): # 用数字表示声调 pinyin = replace_symbol_to_number(pinyin) # 将声调数字移动到最后 return RE_TONE3.sub(r'\1\3\2', pinyin) converter = BopomofoConverter() register(Style.BOPOMOFO, func=converter.to_bopomofo) register(Style.BOPOMOFO_FIRST, func=converter.to_bopomofo_first)
import pypinyin from pypinyin.style import register from .textnorm import textnorm from .nonstd_pinyin import _nonstd_style import jieba jieba.initialize() register('nonstd', _nonstd_style) def _get_pinyin(text, std=True, pb=False): ''' Params: text: string, normalized sentences std: boolean, standard pinyin stylc, default: standard pinyin style pb: boolen, prosody label, default: False Returns: pinyin: string ''' if std: style = pypinyin.Style.TONE3 else: style = 'nonstd' text = text.strip() punctuation = ', ' if text[-1:] in list(',.?!'): punctuation = text[-1:] + ' ' text = text[:-1]
has_fi = has_finals(pinyin) # 用数字表示声调 pinyin = replace_symbol_to_number(pinyin) if not has_fi: return pinyin # 获取韵母部分 return get_finals(pinyin, strict=False) def to_finals_tone3(self, pinyin, **kwargs): if kwargs.get('strict'): pinyin = convert_finals(pinyin) has_fi = has_finals(pinyin) # 用数字表示声调 pinyin = replace_symbol_to_number(pinyin) # 将声调数字移动到最后 pinyin = RE_TONE3.sub(r'\1\3\2', pinyin) if not has_fi: return pinyin # 获取韵母部分 return get_finals(pinyin, strict=False) converter = FinalsConverter() register(Style.FINALS, func=converter.to_finals) register(Style.FINALS_TONE, func=converter.to_finals_tone) register(Style.FINALS_TONE2, func=converter.to_finals_tone2) register(Style.FINALS_TONE3, func=converter.to_finals_tone3)