Пример #1
0
class TrieController():
	def __init__(self):
		self.trie = Trie()
		self.initTrie()
		self.view = TrieDemoView()
		self.view.textEntered.connect(self.handleTextChanged)

	def handleTextChanged(self,stringEntered):
		if len (stringEntered)==0:
			return
		print "word list for " + stringEntered
		text = ""
		for word in  self.trie.getWordList(str(stringEntered)):
			text += word + "\n"
		self.view.setSuggestions(text)

	def initTrie(self):
		f = open("words.txt",'r')
		i = 0
		for line in f:
			self.trie.addWord(line)
			i+=1
		print "added " + str(i) + " words"

	def getView(self):
		return self.view
Пример #2
0
    def __init__(self, parent, *args, **kwargs):
        ttk.Frame.__init__(self, parent, *args, **kwargs)
        self.root = parent

        #Initialize the Trie (dictionary)
        self.dictionary = Trie()

        #initialize text widget
        self.text = Text(self.root, width=100, height=40)
        self.text.grid(column=0, row=2)

        #Initialize the menu
        self.init_gui()

        #Bind space and right-click events on the text widget
        self.text.bind("<space>", self.spell_check)
        self.text.bind('<Button-3>', self.autocorrect)
        self.text.bind('<Key>', self.typing_suggestions)

        #Initize the filename to empty string
        self.filename = ''

        #Initialize the underline tag for mispelt words
        self.text.tag_configure('underline', foreground='red', underline=True)

        #Initialize the highligh tag for found words
        self.text.tag_configure('highlight', background='red')
        self.text.tag_configure('unhighlight', background='white')

        #Initialize pop-up menu for right click
        self.pop_menu = tkinter.Menu(parent, tearoff=0)

        #Initialize pop-up menu for text statistics
        self.pop_stats = tkinter.Menu(parent, tearoff=0)
Пример #3
0
def create_trie(
    post_list
):  # post_list is a map whose keys are document IDs and values are the words in that document
    all_words = set()
    for words in post_list.values():
        for word in words:
            all_words.add(word)

    root = Trie.Node('', [], [])
    for word in all_words:
        current = root
        for i in range(len(word)):
            char = word[i]
            index = Trie.find(
                current.children_letters, char
            )  # check if the children of our current node contains this char
            if index == -1:  # this sequence has not been added so we need to add this char into a child node
                pos = bisect.bisect_left(current.children_letters,
                                         char)  # find the position to insert
                bisect.insort(current.children_letters, char)
                node = Trie.Node(char, [], [])
                current.children_nodes.insert(
                    pos, node)  # insert into the same position
                current = node
            else:  # this sequence is already added so move to that node
                current = current.children_nodes[index]
            if i == len(word) - 1: current.is_terminal = True

    return root
Пример #4
0
def GetASResult(query):
    start = time.clock()
    query = query.strip().lower()
    tokens = nltk.word_tokenize(query)
    result = [""]
    for token in tokens:
        token = token.lower()
        if token not in stops:
            data = Trie.GetNearestMatchFromTrie(wnl.lemmatize(token))
            if not data[0]:
                possiblewords = sorted(Trie.GetListofWords(data[4], 0, 3),
                                       key=lambda x: len(x[0]))[:5]
                newresult = []
                for item in result:
                    for word in possiblewords:
                        newresult.append(item + " " + word[0])
                result = newresult[:]
            else:
                for i in range(len(result)):
                    result[i] += " " + token
        else:
            for i in range(len(result)):
                result[i] += " " + token
    for i in range(len(result)):
        result[i] = result[i].lstrip()

    timetaken = time.clock() - start
    print("Fetched in ", timetaken, " secs")
    return result[:5]
Пример #5
0
    def __init__(self):
        self.positives = ["good", "gud", "well", "great", "decent", "amazing", "excellent", "sexy", "superb", "suburb",
                          "awesome", "awsm", "nice", "happy", "high", "average",
                          "fast", "quick", "immersing", "immersive", "premium",
                          "best", "better",
                          "perfect", "perfection", "beast", "great", "fantastic", "faster", " fabulous", "blazing",
                          "loved", "love",
                          "marvellous", "comfortable", "unbeatable"
                          "charge", "smooth", "beautifully", "beautiful", "superb"]
        self.negatives = ["very bad", "bad", "disappointment", "wrong", "never", "slow", "no", "not good", "not work",
                          "not", "terrible", "heavy"
                          "issue", "defect", "slowest", "lags", "waste", "doesn't work",
                          "doesnt work", "doesnt", "doesn't",
                          "problem", "sucks", "worst",
                          "pathetic", "not good", "not very good", "ineffective", "poor", "not success"]
        self.features = ["phone", "phones", "device", "product", "mobile", "look",
                         "front camera", "back camera", "rear camera", "camera", "selfie", "front",
                         "photos", "pictures", "video", "images", "lowlight pictures", "lowlight",
                         "fingerprint", "fingerprints", "finger print", "finger lock", "fingerlock", "finger", "touch",
                         "display", "hd", "design", "build", "performance", "gorilla glass", "screen", "super amoled",
                         "amoled", "notch", "gorrilla glass",
                         "battery", "backup", "charging", "charge", "charger",
                         "connectivity", "network",
                         "face unlock", "face", "face recognition",
                         "sound", "headset", "headphones", "audio", "speakers", "speaker",
                         "water resistance", "water",
                         "notifications light", "notification light",
                         "adaptive brightness sensor", "sensor", "call quality", "call", "nfc", "wifi", "bluetooth",
                         "other devices",
                         "one ui", "one-ui", "ui", "os", "color os", "coloros", "miui",
                         "pubg", "gaming", "games", "color", "ram", "memory",
                         "heating", "price", "cost", "value for money", "budget", "affordable",
                         "processor", "chipset", "cpu", "speed", "bloatware",
                         "microsd", "micro sd", "storage",
                         "weight"]

        # STOPWORDS
        self.stopwords = nltk.corpus.stopwords.words('english')
        for pos in self.positives:
            if pos in self.stopwords:
                self.stopwords.remove(pos)
        for neg in self.negatives:
            if neg in self.stopwords:
                self.stopwords.remove(neg)
        for ftr in self.features:
            if ftr in self.stopwords:
                self.stopwords.remove(ftr)

        self.trie = Trie()
        for pos in self.positives:
            self.trie.insert(pos, 'Positive')
        for neg in self.negatives:
            self.trie.insert(neg, 'Negative')
        for ftr in self.features:
            self.trie.insert(ftr, 'Features')
Пример #6
0
def init():
    for ques in allQues:
        zh = ques.getTitle()
        ques.setTips(zh)
        pin = lazy_pinyin(zh)
        spell = ""
        for i in pin:
            spell = spell + i[0] #加上首字母
        ques.setSpell(spell)
    # print(len(allQues))
    # print(allQues[0].getNum())
    # print(allQues[0].getSpell())
    for ques in allQues:
        Trie.addTrieOne(ques.getNum(), ques.getSpell())
Пример #7
0
def GetResult(query, rankingType):
    start = time.clock()
    query = query.strip().lower()
    tokens = nltk.word_tokenize(query)
    tokens = [wnl.lemmatize(token) for token in tokens]
    tokenSet = Set(tokens)

    result = []

    finaltoken = []

    for i in range(len(tokens)):
        word = tokens[i]
        data = Trie.GetNearestMatchFromTrie(word)
        # data[0] -> bool result
        # data[1][0] -> number of repositories found in.
        # data[1][1] -> list of repositories
        #               x -> repo
        #                   name, freq, wtfidf
        if (data[0]):
            result.append((word, data[1][1], i))
            finaltoken.append(word)
    if result != []:
        finalresult = resultsList(
            GetTopLibraries(
                rankResults(normalize(conflatedDocids(result, rankingType))))
        )  # Ranking based on TF-IDF/Cosine similarity

        timetaken = time.clock() - start
        print("Fetched in ", timetaken, " secs")
        return (finalresult)

    timetaken = time.clock() - start
    print("Fetched in ", timetaken, " secs")
    print("No results")
Пример #8
0
def creattree():
    diclist = opendict()
    blendssearch = Trie.Trie()
    lendict = len(diclist)
    for i in range(lendict):
        blendssearch.insert(diclist[i])
    return blendssearch
Пример #9
0
def tests():
    #test trie
    trie = Trie.Trie()
    assert trie.root == {}

    #test add: "ate"

    assert trie.add("ate") == {
        'a': [False, {
            't': [False, {
                'e': [True, {None}]
            }]
        }]
    }
    assert trie.add("atom") == {
        'a': [
            False, {
                't': [
                    False, {
                        'e': [True, {None}],
                        'o': [False, {
                            'm': [True, {None}]
                        }]
                    }
                ]
            }
        ]
    }
    print("tests passed")
Пример #10
0
 def testAutocomplete_AlmostPrefix_Present_SuggestPartial_2(self):
     trie = Trie.Trie()
     for word in prefix2:
         trie.insert(word, trie.root)
     self.assertCountEqual(
         Counter(trie.autocomplete("fores")),
         Counter(["foreshadow", "foresight", "foreseeable"]))
Пример #11
0
 def visit(self,p,alpha,newEventSet,i,k,Q):
     for e in newEventSet:
         comparison=False
         for child in p.children:
             if child.event==e:
                 comparison=True
         if comparison:
             if self.debug:
                 print('Already have')
         else:
             q=Trie(e,k+1)
             p.children.append(q)
             if self.debug:
                 print("New Trie:")
                 print(p)
                 print("alpha.events:"+str(alpha.events))
             qAlphaPath=alpha.events.copy()
             if self.debug:
                 print("qAlphaPath:"+str(qAlphaPath))
             qAlphaPath.append(e)
             qAlpha=Episode(qAlphaPath,Window(i,k+1))
             Q.add(qAlpha)
             if self.debug:
                 print("New Q:")
                 for ep in Q:
                     print(Q)
     contained=False
     for ep in Q:
         if str(ep.events)==str(alpha.events):
             contained=True
     if contained:
         p.isLO=False
     if self.debug:
         print("P:")
         print(p)
def lookup():
    search = input("Enter a string for me to lookup: ")
    result = TST.lookup(search)

    if result:
        print("String found!!!")
    else:
        print("String NOT found!!!")
Пример #13
0
 def test_complexity(self):
     empty_tree = Trie.Trie()
     empty_list = []
     test_set = create_test_set(100)
     tree, list = build(empty_tree, empty_list, 100000)
     tree_time = tree_complexity(tree, test_set)
     list_time = list_complexity(list, test_set)
     self.assertTrue(list_time > tree_time)
Пример #14
0
def suggestions():

    # Receiveing the parameters from the GET request
    queryParams = request.args.get('q')
    if queryParams == "":
        return 'null'
    # Creating a Trie object
    trie = Trie()

    # Creating a Full Trie from the corpus received
    sentences = service_data
    for sentence in sentences:
        trie.addSentence(sentence)

    # Getting the list of suggestions by supplying query
    result = trie.generate_completions(queryParams)

    return jsonify(Completions=result)
def compressao(entrada, saida):
    with open(entrada, 'r') as file:
        texto = file.read()
    trie = Trie.Trie()
    arq_saida = open(saida, 'wb')
    indice = 0
    indice = trie.insert(texto, indice, arq_saida)
    file.close()
    arq_saida.close()
Пример #16
0
def main():

    keys = parse_file('dictionary.txt')

    t = Trie.Trie()
    for key in keys:
        t.insert(key)

    solution(t)
Пример #17
0
        def LoadIndex(filename):
            jsonData = json.load(open(filename, "r", encoding="utf8"))
            count = 1
            for i in jsonData:
                Trie.AddKeyToTrie(i, jsonData[i])

                #if (count%10 == 0):
                #    print("done with ", count, " terms")
                count += 1
            print("=", sep="", end="")
Пример #18
0
def main():
    t = Trie.TrieNode()         # Create the root node
    t.build()
    os.system("cls")
    print("This is the extra-credit.")
    while len(Trie.randomWord) < 1:
        t.randomLookUp()
    randword = random.choice(Trie.randomWord)
    print()
    t.printRandomBetter(randword)
Пример #19
0
def get_palindromes(filename, length, function, regex):
    def generate(root, remainder, front, depth):
        if depth <= 0:
            return
        if remainder == "":
            function(root)
            return
        if front:
            # [                     root | remainder]
            # [merwen | redniamer + root + remainder]
            candidates = backward.find_from_pref(remainder)
            for danc in candidates:
                newroot = reverse(danc) + reverse(remainder) + " " + root
                generate(newroot, danc, False, depth-1)
            # [      root | remainder   ]
            # [mer + root + rem | ainder]
            for i in range(len(remainder)+1):
                if backward.is_word(remainder[0:i]):
                    newroot = reverse(remainder[0:i]) + " " + root 
                    newrem = remainder[i:]
                    generate(newroot, newrem, True, depth-1)
        else:
            # [remainder | root]
            # [remainder + root + redniamder | newrem]
            candidates = forward.find_from_pref(remainder)
            for cand in candidates:
                newroot = root + " " + remainder + cand
                generate(newroot, cand, True, depth-1)
            # [remainder    | root      ]
            # [remain | der + root + red]
            for i in range(1,len(remainder)+1):
                if forward.is_word(remainder[0:i]):
                    newroot = root + " " + remainder[0:i]
                    newrem = remainder[i:]
                    generate(newroot, newrem, False, depth-1)
        return

    try:
        dictionary = open(filename, 'r')
    except:
        print "Error: invalid dict file."
        sys.exit()

    forward = Trie()
    backward = Trie()
    for word in dictionary:
        if re.compile(regex).match(word):
            forward.insert(word.strip())
            backward.insert(word[::-1].strip())
    dictionary.close()

    for word in forward.find_words():
        for i in range(len(word)):
            if is_palindrome(word[0:i]):
                generate(word, word[i:], True, length)
        for i in reversed(range(len(word))):
            if is_palindrome(word[i:]):
                generate(word, reverse(word[0:i]), False, length)
Пример #20
0
    def test_is_left_of_edge(self):
        trie = Trie.Tree(8)
        trie.extend([9, 12, 44, 108, 110, 111])

        #
        ## test edge 44
        current = trie.root.left.left.right
        self.assertEqual(current.edge, word(0b101100, 6))
        self.assertEqual(current.key, word(0b00101100, 8))

        # 00000000 is left of 00101100
        q = word(0b00000000, 8)
        result = current.is_left_of_edge(q)
        self.assertTrue(result)

        # 00101011 is left of 00101100
        q = word(0b00101011, 8)
        result = current.is_left_of_edge(q)
        self.assertTrue(result)

        # 00101100 is left of 00101100
        q = word(0b0000101100, 8)
        result = current.is_left_of_edge(q)
        self.assertFalse(result)

        # 00101111 is left of 00101100
        q = word(0b00101111, 8)
        result = current.is_left_of_edge(q)
        self.assertFalse(result)

        #
        ## test edge 0-11011-00 of 108
        current = trie.root.left.right
        self.assertEqual(current.edge, word(0b11011, 5))
        self.assertEqual(current.key, word(0b011011, 6))

        # 00000000 is left of 011011
        q = word(0b00000000, 8)
        result = current.is_left_of_edge(q)
        self.assertTrue(result)

        # 01101000 is left of 011011
        q = word(0b01101000, 8)
        result = current.is_left_of_edge(q)
        self.assertTrue(result)

        # 01101100 is not left of 011011
        q = word(0b01101100, 8)
        result = current.is_left_of_edge(q)
        self.assertFalse(result)

        # 01110111 is not left of 011011
        q = word(0b01110111, 8)
        result = current.is_left_of_edge(q)
        self.assertFalse(result)
Пример #21
0
def test_insertion():
    """Test behavior of trie."""
    tree = Trie.Trie()
    words = [
        "monkey", "monkeybusiness", "banana", "monkey", "bananas", "bananas",
        "m"
    ]

    for word in words:
        tree.insert(word)
    return tree
Пример #22
0
 def __init__(self, documentPath, dictionaryFilePath):
     # Create Trie to store dictionary words
     self.dictionaryTrie = Trie.Trie()
     # Create rope structure for document
     self.document = None
     self.populateDocumentWithInputText(documentPath)
     # Insert dictionary words in Trie
     for word in self.yieldWords(dictionaryFilePath):
         self.dictionaryTrie.insert(word, self.dictionaryTrie.root)
     # Create an empty rope object to later store pasted text
     self.paste_text = ropes.Rope("")
Пример #23
0
 def load_data():
     with open('csv_data/data.csv', 'r') as f:
         reader = csv.reader(f, delimiter=',')
         headers = next(reader)
         data = list(reader)
     trie = Trie.TrieNode()
     for elem in data:
         if ' ' in elem: elem.remove(' ')
         if '' in elem: elem.remove('')
         trie.insert(' '.join(elem).strip().lower())
     return trie
Пример #24
0
 def testAutocomplete_LongWord_NotPresent_1(self):
     trie = Trie.Trie()
     for word in prefix1:
         trie.insert(word, trie.root)
     longEnd = ""
     for _ in range(20):
         longEnd += chr(
             choice([
                 i for i in range(65, 123) if i not in list(range(91, 97))
             ]))
     self.assertEqual(trie.autocomplete("inter" + longEnd), None)
Пример #25
0
 def testAutocomplete_NotPrefix_Not_Present_5(self):
     trie = Trie.Trie()
     for word in prefix5:
         trie.insert(word, trie.root)
     # generates a random letter (uppercase or lower) that is not that of the first letter in the prefix
     random_letter = choice([
         i for i in range(65, 123) if i not in list(range(91, 97)) +
         [ord(prefixes[4][0].upper()),
          ord(prefixes[4][0].lower())]
     ])
     self.assertEqual(trie.autocomplete(chr(random_letter)), None)
Пример #26
0
    def test_total_count(self):
        t = Trie.Trie()
        self.assertEqual(t.total_count(), 0)

        t.add_word('wonder')
        t.add_word('happy')
        t.add_word('beautiful')

        self.assertEqual(t.total_count(), 3)

        t.add_word('')
        self.assertEqual(t.total_count(), 3)
Пример #27
0
def main():
    t = Trie.TrieNode()  # Create the root node
    t.build()

    while len(Trie.randomWord) < 100:
        t.randomLookUp()
    os.system("cls")
    print(
        "This is Part 1: 100 random words from Alice in Wonderland, please notice that there are no repeat words.\n"
    )
    t.printRandom()
    print("\n\n")
Пример #28
0
 def __init__(self, path=None):
     self.path = path
     self.lock = Lock()
     if path is None:
         self.db = sqlite3.connect(p.curdir + "/BlockChain.sqlite3")
         self.path = p.curdir + "/BlockChain.sqlite3"
     else:
         self.db = sqlite3.connect(path)
     if os.path.exists("./trie"):
         file = io.open("./trie", 'rb')
         self.trie = Trie.Trie('$')
         id = file.read(11)
         while len(id) == 11:
             self.trie.insert(id.decode())
             id = file.read(11)
         file.close()
     else:
         self.db.cursor().execute("drop table if exists Blocks")
         io.open("./trie", 'xb')
         self.trie = Trie.Trie('$')
     self.tip2 = []
     self.init_db()
     self.db.close()
Пример #29
0
def lexicon_from_file(lexicon_filename):
    '''takes a file of words and builds Trie from it.
    Note: each word in the list should end with a newline character.
    Args: filename (str)
    Returns: lexicon (Trie)'''

    lexicon = Trie.Trie()
    f = open(lexicon_filename, 'r')
    p = re.compile('qu')

    for line in f:
        if re.match('q[^u]', line): continue
        lexicon.add_word(p.sub('q', line[:-1]))
    return lexicon
Пример #30
0
def creatreversetree():
    dictlist = []
    f1 = open("dict.txt", "r")
    for words in f1:
        if duplicate(words) == 1:
            dictwords = words.strip()
            dicwords = dictwords[::1]
            dictlist.append(dicwords)
    f1.close()
    research = Trie.Trie()
    lendict = len(dictlist)
    for i in range(lendict):
        research.insert(dictlist[i])
    return research
Пример #31
0
    def construct(self, items):
        trie = Trie.Tree(self.w)
        trie.extend(items)

        self.root = trie.root

        self._add_branch_node(self.root, self.root)

        # start at next_depth = self.sqrt_log_u, because epsilon was already added
        self._construct_hash_table(self.root,
                                   self.root.left,
                                   next_depth=self.sqrt_log_u)
        self._construct_hash_table(self.root,
                                   self.root.right,
                                   next_depth=self.sqrt_log_u)
Пример #32
0
 def __init__(self, document):
     self.textLength = len(document)
     self.original = document
     self.buffer = ""
     self.pieces = [Piece(False, 0, len(document))]
     self.dictionary = Trie.Trie()
     # On windows, the dictionary can often be found at:
     # C:/Users/{username}/AppData/Roaming/Microsoft/Spelling/en-US/default.dic
     with open(
             "C:/Users/kirti/AppData/Roaming/Microsoft/Spelling/en-US/default.dic"
     ) as input_dictionary:
         for line in input_dictionary:
             words = line.strip().split(" ")
             for word in words:
                 self.dictionary.insert(word.lower())
     self.pasteText = ""
Пример #33
0
	def __init__(self):
		self.trie = Trie()
		self.initTrie()
		self.view = TrieDemoView()
		self.view.textEntered.connect(self.handleTextChanged)
Пример #34
0
# Ben Reynolds 13309656
# Task2  (Test) - Assignment 1
from LexicalAnalyzier import *
from Trie import *

Lexer = Lexer()
Lexer.driver()
#Lexer.printTrie()


trie = Trie()
trie.proccessWord("private")
trie.proccessWord("public")
trie.proccessWord("protected")
trie.proccessWord("static")
trie.proccessWord("primary")
trie.proccessWord("integer")
trie.proccessWord("exception")
trie.proccessWord("try")