Exemplo n.º 1
0
def test_pre_convert_style_return_value():
    class A(DefaultConverter):
        def pre_convert_style(self, han, orig_pinyin, style, strict, **kwargs):
            return 'test'

    han = '测试'
    assert DefaultConverter().convert(han, Style.TONE2, False, 'ignore',
                                      True) == [['ce4'], ['shi4']]
    assert A().convert(han, Style.TONE2, False, 'ignore', True) == [['test'],
                                                                    ['test']]
Exemplo n.º 2
0
def test_post_handle_nopinyin_return_value():
    class A(DefaultConverter):
        def post_handle_nopinyin(self, chars, style, heteronym, errors, strict,
                                 pinyin, **kwargs):
            return 'abc'

    han = 'test'
    assert DefaultConverter().convert(han, Style.TONE2, False, 'default',
                                      True) == [['test']]
    assert A().convert(han, Style.TONE2, False, 'default', True) == [['abc']]
Exemplo n.º 3
0
def test_post_pinyin_return_value_phrase_pinyin():
    class A(DefaultConverter):
        def post_pinyin(self, han, heteronym, pinyin, **kwargs):
            return {
                '北': [['zhāo']],
                '京': [['yáng']],
                '北京': [['zhāo'], ['yáng']],
            }[han]

    han = '北京'
    assert DefaultConverter().convert(han, Style.TONE3, False, 'ignore',
                                      True) == [['bei3'], ['jing1']]
    assert A().convert(han, Style.TONE3, False, 'ignore', True) == [['zhao1'],
                                                                    ['yang2']]
Exemplo n.º 4
0
def test_post_pinyin_return_value_single_pinyin():
    class A(DefaultConverter):
        def post_pinyin(self, han, heteronym, pinyin, **kwargs):
            return {
                '测': [['zhāo']],
                '试': [['yáng']],
                '测试': [['zhāo'], ['yáng']],
            }[han]

    han = '测试'
    assert DefaultConverter().convert(han, Style.TONE3, False, 'ignore',
                                      True) == [['ce4'], ['shi4']]
    assert A().convert(han, Style.TONE3, False, 'ignore', True) == [['zhao1'],
                                                                    ['yang2']]
Exemplo n.º 5
0
 def __init__(self, converter=None, **kwargs):
     self._converter = converter or DefaultConverter()
Exemplo n.º 6
0
        """对字符串进行分词后将调用 ``post_seg`` 方法对分词后的结果做处理。

        默认原样返回传入的 ``seg_data``。

        如果这个方法的返回值类型是 ``list``,表示对分词结果做了二次处理,此时,
        ``seg`` 方法将以这个返回的数据作为返回值。

        :param hans: 分词前的字符串
        :param seg_data: 分词后的结果
        :type seg_data: list
        :return: ``None`` or ``list``
        """
        pass


_default_convert = DefaultConverter()
_default_pinyin = Pinyin(_default_convert)


def to_fixed(pinyin, style, strict=True):
    # 用于向后兼容,TODO: 废弃
    return _default_convert.convert_style(
        '', pinyin, style=style, strict=strict, default=pinyin)


_to_fixed = to_fixed


def handle_nopinyin(chars, errors='default', heteronym=True):
    # 用于向后兼容,TODO: 废弃
    return _default_convert.handle_nopinyin(