def test_pre_convert_style_return_value(): class A(DefaultConverter): def pre_convert_style(self, han, orig_pinyin, style, strict, **kwargs): return 'test' han = '测试' assert DefaultConverter().convert(han, Style.TONE2, False, 'ignore', True) == [['ce4'], ['shi4']] assert A().convert(han, Style.TONE2, False, 'ignore', True) == [['test'], ['test']]
def test_post_handle_nopinyin_return_value(): class A(DefaultConverter): def post_handle_nopinyin(self, chars, style, heteronym, errors, strict, pinyin, **kwargs): return 'abc' han = 'test' assert DefaultConverter().convert(han, Style.TONE2, False, 'default', True) == [['test']] assert A().convert(han, Style.TONE2, False, 'default', True) == [['abc']]
def test_post_pinyin_return_value_phrase_pinyin(): class A(DefaultConverter): def post_pinyin(self, han, heteronym, pinyin, **kwargs): return { '北': [['zhāo']], '京': [['yáng']], '北京': [['zhāo'], ['yáng']], }[han] han = '北京' assert DefaultConverter().convert(han, Style.TONE3, False, 'ignore', True) == [['bei3'], ['jing1']] assert A().convert(han, Style.TONE3, False, 'ignore', True) == [['zhao1'], ['yang2']]
def test_post_pinyin_return_value_single_pinyin(): class A(DefaultConverter): def post_pinyin(self, han, heteronym, pinyin, **kwargs): return { '测': [['zhāo']], '试': [['yáng']], '测试': [['zhāo'], ['yáng']], }[han] han = '测试' assert DefaultConverter().convert(han, Style.TONE3, False, 'ignore', True) == [['ce4'], ['shi4']] assert A().convert(han, Style.TONE3, False, 'ignore', True) == [['zhao1'], ['yang2']]
def __init__(self, converter=None, **kwargs): self._converter = converter or DefaultConverter()
"""对字符串进行分词后将调用 ``post_seg`` 方法对分词后的结果做处理。 默认原样返回传入的 ``seg_data``。 如果这个方法的返回值类型是 ``list``,表示对分词结果做了二次处理,此时, ``seg`` 方法将以这个返回的数据作为返回值。 :param hans: 分词前的字符串 :param seg_data: 分词后的结果 :type seg_data: list :return: ``None`` or ``list`` """ pass _default_convert = DefaultConverter() _default_pinyin = Pinyin(_default_convert) def to_fixed(pinyin, style, strict=True): # 用于向后兼容,TODO: 废弃 return _default_convert.convert_style( '', pinyin, style=style, strict=strict, default=pinyin) _to_fixed = to_fixed def handle_nopinyin(chars, errors='default', heteronym=True): # 用于向后兼容,TODO: 废弃 return _default_convert.handle_nopinyin(