def test_NRPattern(self): """ """ trie = DoubleArrayTrie() NRPattern.sort() trie.build(key=NRPattern) self.assertTrue(trie.exact_match_search("BCD") != -1) self.assertTrue(trie.exact_match_search("BBCD") != -1) self.assertTrue(trie.exact_match_search("BG") != -1) self.assertTrue(trie.exact_match_search("DG") != -1) self.assertTrue(trie.exact_match_search("CD") == -1)
def test_combin_by_dict(self): dat = DoubleArrayTrie() dat.build([u"江", u"河", u"湖", "海"]) text = u"江河湖海" word_net = WordNet(text) gen_word_net(text, word_net, dat) vertexs = [v[0] for v in word_net.vertexs] self.assertEqual(len(word_net), 6, u"自定义字典分词") combin_dat = DoubleArrayTrie() combin_dat.build(key=[u"江河湖海"], v=[u"江河湖海 n 1"]) vertexs = combine_by_custom_dict(vertexs, combin_dat) self.assertEqual(len(vertexs), 3, u"合并完成后应该只有前尾加中间词")
def __init__(self): self.trie = DoubleArrayTrie() NSPattern.sort() self.trie.build(key=NSPattern)
def __init__(self): self.trie = DoubleArrayTrie.load(config.PLACE_DICT_NAME, enum_cls=NS) self.matrix = HMMMatrix.load(config.PLACE_TR_PATH, NS)
class NRPatternDict: def __init__(self): self.trie = DoubleArrayTrie() NRPattern.sort() self.trie.build(key=NRPattern)
def __init__(self): self.trie = DoubleArrayTrie.load(config.TRADITIONAL_CHINESE_DICT_NAME, lambda i: i[:i.find(u'=')], lambda i: i.split('=')) self.trie.get_attr = lambda v: v
def __init__(self): self.trie = DoubleArrayTrie.load(config.TRADITIONAL_CHINESE_DICT_NAME, lambda i: i[i.find(u'=') + 1:], lambda i: i.split('=')[::-1], dict_bin_ext=config.DICT_BIN_REVERSE_EXT) self.trie.get_attr = lambda v: v