Пример #1
0
    def test_pinyin_segmenter(self):
        "Tests for correct segmentation and tones detection."
        segmenter = pinyin_table.get_pinyin_segmenter()
        self.assertEqual(
            segmenter.segment_pinyin('woshangdaxue'),
            (('wo', 0), ('shang', 0), ('da', 0), ('xue', 0)),
        )
        self.assertEqual(
            segmenter.segment_pinyin('wo1shang2da3xue4'),
            (('wo', 1), ('shang', 2), ('da', 3), ('xue', 4)),
        )
        self.assertEqual(
            segmenter.segment_pinyin('cheng2zhewei2wang2'),
            (('cheng', 2), ('zhe', 0), ('wei', 2), ('wang', 2)),
        )
        self.assertEqual(
            segmenter.segment_pinyin('yi1ge4jin4r'),
            (('yi', 1), ('ge', 4), ('jin', 4), ('er', 0))
        )
        self.assertEqual(
            segmenter.segment_pinyin(u'yi1lü4xu'),
            (('yi', 1), (u'lü', 4), (u'xu', 0))
        )
        self.assertEqual(
            segmenter.segment_pinyin(u'yi1lü4xu'),
            (('yi', 1), (u'lü', 4), (u'xu', 0))
        )

        self.assertEqual(
            segmenter.segment_pinyin(u'shangqi3bu4'),
            (('shang', 0), (u'qi', 3), (u'bu', 4))
        )
Пример #2
0
 def test_should_fail(self):
     """
     This test should fail with the regex segmenter.
     Build a better segmenter!!!
     """
     segmenter = pinyin_table.get_pinyin_segmenter()
     self.assertNotEqual(
         segmenter.segment_pinyin('deniu2'),
         (('de', 0), ('niu', 2))
     )