Пример #1
0
    def test_NRPattern(self):
        """


        """
        trie = DoubleArrayTrie()
        NRPattern.sort()
        trie.build(key=NRPattern)
        self.assertTrue(trie.exact_match_search("BCD") != -1)
        self.assertTrue(trie.exact_match_search("BBCD") != -1)
        self.assertTrue(trie.exact_match_search("BG") != -1)
        self.assertTrue(trie.exact_match_search("DG") != -1)
        self.assertTrue(trie.exact_match_search("CD") == -1)
Пример #2
0
    def test_combin_by_dict(self):
        dat = DoubleArrayTrie()
        dat.build([u"江", u"河", u"湖", "海"])
        text = u"江河湖海"
        word_net = WordNet(text)
        gen_word_net(text, word_net, dat)
        vertexs = [v[0] for v in word_net.vertexs]
        self.assertEqual(len(word_net), 6, u"自定义字典分词")

        combin_dat = DoubleArrayTrie()
        combin_dat.build(key=[u"江河湖海"], v=[u"江河湖海 n 1"])
        vertexs = combine_by_custom_dict(vertexs, combin_dat)
        self.assertEqual(len(vertexs), 3, u"合并完成后应该只有前尾加中间词")
Пример #3
0
    def test_combin_by_dict(self):
        dat = DoubleArrayTrie()
        dat.build([u"江", u"河", u"湖", "海"])
        text = u"江河湖海"
        word_net = WordNet(text)
        gen_word_net(text, word_net, dat)
        vertexs = [v[0] for v in word_net.vertexs]
        self.assertEqual(len(word_net), 6, u"自定义字典分词")

        combin_dat = DoubleArrayTrie()
        combin_dat.build(key=[u"江河湖海"], v=[u"江河湖海 n 1"])
        vertexs = combine_by_custom_dict(vertexs, combin_dat)
        self.assertEqual(len(vertexs), 3, u"合并完成后应该只有前尾加中间词")
Пример #4
0
 def __init__(self):
     self.trie = DoubleArrayTrie()
     NSPattern.sort()
     self.trie.build(key=NSPattern)
Пример #5
0
 def __init__(self):
     self.trie = DoubleArrayTrie.load(config.PLACE_DICT_NAME, enum_cls=NS)
     self.matrix = HMMMatrix.load(config.PLACE_TR_PATH, NS)
Пример #6
0
class NRPatternDict:
    def __init__(self):
        self.trie = DoubleArrayTrie()
        NRPattern.sort()
        self.trie.build(key=NRPattern)
Пример #7
0
 def __init__(self):
     self.trie = DoubleArrayTrie.load(config.TRADITIONAL_CHINESE_DICT_NAME,
                                      lambda i: i[:i.find(u'=')],
                                      lambda i: i.split('='))
     self.trie.get_attr = lambda v: v
Пример #8
0
 def __init__(self):
     self.trie = DoubleArrayTrie.load(config.TRADITIONAL_CHINESE_DICT_NAME,
                                      lambda i: i[i.find(u'=') + 1:],
                                      lambda i: i.split('=')[::-1],
                                      dict_bin_ext=config.DICT_BIN_REVERSE_EXT)
     self.trie.get_attr = lambda v: v