Пример #1
0
    def build(self, data):

        # build list of actual lines for chunking
        lines = []

        with open(data, "r") as file:
            lines = file.readlines()

        # get our dict of word-indexed chunklists
        chunker = VSSChunkMiner()
        words = chunker.build(lines)

        # build a trie from chunklists
        trie = Trie()
        for word in words:
            if word != '':
                target = trie.getSubtree(word)
                if target == None:
                    target = TrieNode()
                    trie.addSubtree(word, target)
                else:
                    target = target.root

                for item in words[word]:
                    target.content.append(item)

        return trie
Пример #2
0
class TrieMiner:
    def __init__(self, filename):
        chunker = SRTChunker(filename)
        self.trie = Trie()
        for word in chunker.words:
            if word != '':
                target = TrieNode()
                self.trie.addSubtree(word, target)
                target.content.append(chunker.words[word])

    def getTrie(self):
        return self.trie
Пример #3
0
    def build(self, data):
        words = data

        # build a trie from chunklists
        trie = Trie()
        for word in words:
            if word != '':
                target = trie.getSubtree(word)
                if target == None:
                    target = TrieNode()
                    trie.addSubtree(word, target)
                else:
                    target = target.root

                for item in words[word]:
                    target.content.append(item)

        return trie