def test_pinyin_segmenter(self): "Tests for correct segmentation and tones detection." segmenter = pinyin_table.get_pinyin_segmenter() self.assertEqual( segmenter.segment_pinyin('woshangdaxue'), (('wo', 0), ('shang', 0), ('da', 0), ('xue', 0)), ) self.assertEqual( segmenter.segment_pinyin('wo1shang2da3xue4'), (('wo', 1), ('shang', 2), ('da', 3), ('xue', 4)), ) self.assertEqual( segmenter.segment_pinyin('cheng2zhewei2wang2'), (('cheng', 2), ('zhe', 0), ('wei', 2), ('wang', 2)), ) self.assertEqual( segmenter.segment_pinyin('yi1ge4jin4r'), (('yi', 1), ('ge', 4), ('jin', 4), ('er', 0)) ) self.assertEqual( segmenter.segment_pinyin(u'yi1lü4xu'), (('yi', 1), (u'lü', 4), (u'xu', 0)) ) self.assertEqual( segmenter.segment_pinyin(u'yi1lü4xu'), (('yi', 1), (u'lü', 4), (u'xu', 0)) ) self.assertEqual( segmenter.segment_pinyin(u'shangqi3bu4'), (('shang', 0), (u'qi', 3), (u'bu', 4)) )
def test_should_fail(self): """ This test should fail with the regex segmenter. Build a better segmenter!!! """ segmenter = pinyin_table.get_pinyin_segmenter() self.assertNotEqual( segmenter.segment_pinyin('deniu2'), (('de', 0), ('niu', 2)) )