def test_person_name_V_should_split_to_EL_DL(self): text = u"龚学平、张晓辉等领导说,邓颖超生前杜绝超生" vertexs = segment.seg_to_vertexs(text) terms = segment.vertexs_to_terms(vertexs, True) self.assertIn(u"龚学平", terms) self.assertIn(u"张晓辉", terms) self.assertIn(u"邓颖超", terms)
def test_viterbi(self): text = u"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作" # text = u"商品23和服务" word_net = WordNet(text) gen_word_net(text, word_net) vertex_list = vertexs_to_terms(viterbi(word_net.vertexs), True) self.assertTrue(u"工信处" in vertex_list) self.assertTrue(u"女" in vertex_list) self.assertTrue(u"干事" in vertex_list) self.assertTrue(u"每月" in vertex_list) self.assertTrue(u"经过" in vertex_list) self.assertTrue(u"下属" in vertex_list) self.assertTrue(u"科室" in vertex_list) self.assertTrue(u"都" in vertex_list) self.assertTrue(u"要" in vertex_list) self.assertTrue(u"亲口" in vertex_list) self.assertTrue(u"交代" in vertex_list) self.assertTrue(u"24" in vertex_list) self.assertTrue(u"口" in vertex_list) self.assertTrue(u"交换机" in vertex_list) self.assertTrue(u"等" in vertex_list) self.assertTrue(u"技术性" in vertex_list) self.assertTrue(u"器件" in vertex_list) self.assertTrue(u"的" in vertex_list) self.assertTrue(u"安装" in vertex_list) self.assertTrue(u"工作" in vertex_list)