示例#1
0
    def __init__(self):
        self.win = Tk()
        self.win.geometry('1500x500')

        self.result = None
        self.trie = TrieNode()
        self.d = {}
        self.popped = []
        self.curr_text = ""

        self.load_data()
        self.lbl1 = Label(self.win, text='ENTER WORD')
        self.lbl3 = Label(self.win, text='Result')
        self.t1 = Entry(bd=3)
        self.t3 = Entry(width=100)
        self.lbl1.place(x=100, y=50)
        self.t1.place(x=200, y=50)
        self.b1 = Button(self.win, text='SEARCH', command=self.search)
        self.b1.place(x=100, y=150)
        self.b2 = Button(self.win,
                         text='Next suggestion',
                         command=self.next_sugg)
        self.b2.place(x=100, y=350)
        self.lbl3.place(x=100, y=200)
        self.t3.place(x=200, y=200)

        self.win.mainloop()
示例#2
0
 def test_node_set_child(self):
     """
     Test TrieNode.set_child method
     """
     # set children and check they are added properly
     node = TrieNode()
     for i, letter in enumerate(string.ascii_lowercase):
         node.set_child(letter)
         self.assertIsNotNone(node.children[i])
         self.assertIsInstance(node.children[i], TrieNode)
         self.assertTrue(node.children[i].empty())
示例#3
0
def trie_search_surname(surname):
    book_list = load_books(file)
    book_list = book_list[1:]
    surnames = []
    for book in book_list:
        author = book[3]
        author_last_name = author.split(" ")[-1]
        surnames.append(author_last_name)
    root = TrieNode()
    root.insert_many(surnames)
    return root.find(req_title)
示例#4
0
def trie_search_id(req_title):
    book_list = load_books(file)
    titles = []
    for i in range(len(book_list)):
        if i == 0:
            continue
        else:
            book = book_list[i]
            title = "".join(book[2:len(book) - 2])
            titles.append(title)
    root = TrieNode()
    root.insert_many(titles)
    return root.find(req_title)
示例#5
0
    def test_node_get_index(self):
        """
        Test TrieNode._get_index static method
        """
        # test lowercase letters
        for i, letter in enumerate(string.ascii_lowercase):
            index = TrieNode._get_index(letter)
            self.assertEqual(i, index)

        # test uppercase letters
        for i, letter in enumerate(string.ascii_uppercase):
            index = TrieNode._get_index(letter)
            self.assertEqual(i, index)
示例#6
0
 def test_initialize_trie_node_object_with_alphabet_length(self):
     node = TrieNode('a', 4)
     self.assertEqual(node.character, 'a')
     self.assertIsNone(node.value)
     self.assertEqual(node.alphabet_length, 4)
     self.assertEqual(node.children, [None] * 4)
     self.assertFalse(node.is_end_of_word)
示例#7
0
class Model(object):
    def __init__(self):
        self.root = TrieNode()
        self.PrefixOnly = 1
        
    def Construct(self,filename):
        buf = LoadFile(filename)
        for line in buf:
            line = line.strip()
            self.root.add(line)
        buf.close()
 
    def List(self,word):
        container = []
        if self.PrefixOnly == 1:
            self.root.find(word,container)
        else:
            self.root.findInfix(word,container)
        return container 
    
    def Contains(self,word):
        return self.root.contains(word.strip())
        
    def SwitchCommand(self):
        self.PrefixOnly = 1 ^ self.PrefixOnly
示例#8
0
 def insert(self, word, cur_candidate):
     """
     Insert a new word into our Trie
     """
     cur = self.root
     for letter in word:
         if letter not in cur.children:
             cur.children[letter] = TrieNode()
         cur = cur.children[letter]
     cur.isEnd = True
     cur.candidate = cur_candidate
示例#9
0
    def test_node_empty(self):
        """
        Test TrieNode.empty method
        """
        # empty node
        node = TrieNode()
        self.assertTrue(node.empty())

        # nonempty node
        node.children[0] = TrieNode()
        self.assertFalse(node.empty())
示例#10
0
    def test_node_delete_child(self):
        """
        Test TrieNode.delete_child method
        """
        # add children
        node = TrieNode()
        children = [TrieNode() for i in range(len(string.ascii_lowercase))]
        node.children = children

        # ensure all children are deleted
        for i, letter in enumerate(string.ascii_lowercase):
            self.assertIsNotNone(node.children[i])
            node.delete_child(letter)
            self.assertIsNone(node.children[i])

        self.assertTrue(node.empty())
示例#11
0
    def test_node_get_child(self):
        """
        Test TrieNode.get_child method
        """
        # check on node with no children
        node = TrieNode()
        for letter in string.ascii_lowercase:
            self.assertIsNone(node.get_child(letter))

        # add children and check on each one
        children = [TrieNode() for i in range(len(string.ascii_lowercase))]
        node.children = children
        for i, letter in enumerate(string.ascii_lowercase):
            result = node.get_child(letter)
            self.assertIs(result, node.children[i])
示例#12
0
    exchan_p.sort()
    exchan_p.reverse()
    print "sort! finished!"

    limit_n = int(math.log(len(k_word))) * 2  #取前n个作为关键词
    print "%d" % limit_n
    #输出词频统计结果
    for i in range(limit_n):
        #print"%s\t%s"%(exchan_p[i][1],str(exchan_p[i][0]))
        result.append(exchan_p[i][1])
    for i in result:
        print i
    print "rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr"

    #对每条短信进行关键词筛选处理,并放入字典树
    rootNode = TrieNode("")  # 根节点
    dir_file.seek(0)  #回到文件头部重新遍历短信
    mess = dir_file.readline()
    cnt = 0
    while mess:
        mes_deal = []
        seg_list = jieba.cut(mess, cut_all=False)
        for seg_word in seg_list:  #筛出关键词
            if result.count(seg_word) == 1:
                mes_deal.append(seg_word)
        s = str(mes_deal).replace('u\'', '\'')
        #print "aaaaaaaaaaaaaaaaaaaaaaaaaa"
        if cnt % 10000 == 0:
            print "sentenses%d   %s" % (cnt, s.decode("unicode-escape"))
        insertMsg(rootNode, mes_deal, cnt)  # 插入字典树
        del mes_deal[:]
示例#13
0
class MyWindow:
    def __init__(self):
        self.win = Tk()
        self.win.geometry('1500x500')

        self.result = None
        self.trie = TrieNode()
        self.d = {}
        self.popped = []
        self.curr_text = ""

        self.load_data()
        self.lbl1 = Label(self.win, text='ENTER WORD')
        self.lbl3 = Label(self.win, text='Result')
        self.t1 = Entry(bd=3)
        self.t3 = Entry(width=100)
        self.lbl1.place(x=100, y=50)
        self.t1.place(x=200, y=50)
        self.b1 = Button(self.win, text='SEARCH', command=self.search)
        self.b1.place(x=100, y=150)
        self.b2 = Button(self.win,
                         text='Next suggestion',
                         command=self.next_sugg)
        self.b2.place(x=100, y=350)
        self.lbl3.place(x=100, y=200)
        self.t3.place(x=200, y=200)

        self.win.mainloop()

    def search(self):
        self.t3.delete(0, 'end')
        word = self.t1.get()
        self.curr_text = word
        self.result = self.find(word)
        tmp = ""
        if type(self.result) == list:
            for i in range(3):
                pop = hq.heappop(self.result)
                self.popped.append(pop)
                tmp = tmp + pop[1] + " "
            tmp = "Did you mean : " + tmp
        else:
            tmp = self.result
        self.t3.insert(END, tmp)

    def next_sugg(self):
        tmp = ""

        self.t3.delete(0, 'end')
        if self.t1.get() != self.curr_text:
            self.t3.insert(END, "press search first....!!!!")
        else:
            if len(self.result) < 3:
                self.result = hq.heapify(self.popped)
                self.popped = []
            for i in range(3):
                pop = hq.heappop(self.result)
                self.popped.append(pop)
                tmp = tmp + pop[1] + " "
            tmp = "Did you mean : " + tmp
            self.t3.insert(END, tmp)

    def load_data(self):

        with open("dictionary.txt") as f:
            for line in f:
                if line == '\n' or line == '' or line == ' ':
                    continue
                if len(line) <= 3:
                    continue
                words = line.split()
                key, val = words[0], words[1:]
                key = re.sub(r'[^a-zA-Z]', '', key)
                self.d[key] = " ".join(val)


#         trie = TrieNode()
        for word in self.d.keys():
            self.trie.insert(word)

    def find(self, s):
        s = s.capitalize()
        if s in self.d:
            return self.d[s]
        else:
            return search(s, 5, self.trie)
示例#14
0
from Trie import TrieNode

T = TrieNode("J")

T.add_child("O", True)

T.child("O").add_child("H")

T.child("O").child("H").add_child("N", True)

T.child("O").child("H").child("N").add_child("A")

T.child("O").child("H").child("N").child("A").add_child("T")

T.child("O").child("H").child("N").child("A").child("T").add_child("H")

T.child("O").child("H").child("N").child("A").child("T").child("H").add_child(
    "A")

T.child("O").child("H").child("N").child("A").child("T").child("H").child(
    "A").add_child("N", True)


def print_trie(prefix, t):
    prefix += t.character
    if t.is_end():
        print(prefix)

    for c in t.get_children():
        print_trie(prefix, t.child(c))
示例#15
0
 def __init__(self):
     self.root = TrieNode()
     self.PrefixOnly = 1
示例#16
0
import pickle

import pandas as pd
from flask import Flask, jsonify, request
from sklearn.feature_extraction.text import TfidfVectorizer

from MajorProcessing import makeTokens, bad_word_exists, detect_content_type, get_profanity_probability, \
    _get_malicious_url_probability
from Trie import TrieNode

app = Flask(__name__)
badwords_trie_root: TrieNode = TrieNode('*')
vectorizer = TfidfVectorizer(tokenizer=makeTokens)


@app.route('/')
def home():
    return 'Encrypted QR Codes Server is Running'


@app.route('/check_bad_words', methods=['POST'])
def check_bad_words():
    params = request.get_json()
    try:
        if params['secret'] == 'its_very_hard':
            sentence = params['sentence']
    except:
        return jsonify({'message': 'check your params', 'data': {}, 'success': False}), 400
    resultant = bad_word_exists(badwords_trie_root, sentence)
    return jsonify({'message': 'successfully checked bad words', 'success': True, 'data': resultant}), 201
示例#17
0
 def __init__(self):
     # mapping of words to candidate objects
     self.candidate_dict = defaultdict(Candidate)
     self.root = TrieNode()  # root node for our Trie
示例#18
0
 def test_raise_exception_if_alphabet_length_less_than_zero(self):
     with self.assertRaises(ValueError):
         _ = TrieNode('a', -1)