def setup_method(self, method): self.mytrie = Trie('') self.mytrie.add('cat') self.mytrie.add('cab') self.mytrie.add('dog') self.mytrie.add('what') self.mytrie.add('whatsup')
def generate_table_trie(loc): '''Recebe um local de pasta contendo tabelas pre formatadas e gera uma trie contendo as tabelas como folha e os caracteres das labels como nodo Retorna trie gerada ''' print("Generating Table Trie...\n") #gera um local de uma tabela fonte a cada passo tabs = generate_loc(loc) #Lista com as tabelas list_tables = [] #Cria trie t = Trie() #Cria lista de objetos tabela for tabela in tabs: list_tables.append(Table(RawTable(tabela))) #Insere labels de tabela na trie for tabela in list_tables: insert(tabela.table_label,tabela,t.root) #Preenche dicionario de acesso da trie e outros com strings pertencentes a mesma t.yield_strings(t.root) print("Generating Reverse Table Trie...\n") #Gera trie reversa para busca por sufixo generate_reverse_trie(t) return t
class Dictionary: """""" def __init__(self, dict_map): """ param dict_map is a map of all words which likes dict_map['english'] = 'chinese' """ self.dict_trie = Trie(dict_map) self.similar_tree = BK_Tree() for key in dict_map: self.similar_tree.insert(key) self.dict_map = dict_map def search(self, key): """main word in dict""" return self.dict_trie.search(key) def getSimilarWord(self, word, num=5, maxDistance=5): assert word is not None words = self.similar_tree.topKSimilar(word, num, maxDistance) trans = [] for value in words: tmp = self.dict_trie.search(value[0]) data = { "word": value[0], "translation": tmp[value[0]], "distance": value[1] } trans.append(data) return trans
def setUp(self): self.trie = Trie() self.case = [ "A", "a", "aa", "aal", "aalii", "aam", "Aani", "aardvark", "aardwolf", "Aaron", "Aaronic", "Aaronite", "Aaronitic", "Aaru", "Ab", "Ababdeh", "Ababua", "abac", "abacay", "abacinate" ]
def getAutoComplete(query): trie = Trie(filtered_words) word_suggestion = jsonify({ 'query': query, 'results': trie.suggestions(query) }) return word_suggestion
class TestTrie(unittest.TestCase): def setUp(self): self.trie = Trie() self.case = [ "A", "a", "aa", "aal", "aalii", "aam", "Aani", "aardvark", "aardwolf", "Aaron", "Aaronic", "Aaronite", "Aaronitic", "Aaru", "Ab", "Ababdeh", "Ababua", "abac", "abacay", "abacinate" ] def testAddContains(self): words = set(self.case) length = len(words) // 2 setA = set() for i in range(length // 2): setA.add(words.pop()) setB = words for word in setA: self.trie.add(word) for word in setA: self.assertIn(word, self.trie) for word in setB: self.assertNotIn(word, self.trie) def testContainsPrefix(self): prefixes = set(["A", "a", "aa", "aal", "Aaron", "Ab", "aba", "abac"]) others = [ "abaciscus", "abacist", "aback", "abactinal", "Abe", "abaction" ] for word in set(self.case) - prefixes: self.trie.add(word) for prefix in prefixes: self.assertTrue(self.trie.containsPrefix(prefix)) for word in others: self.assertFalse(self.trie.containsPrefix(word))
def test_search(self): """ Test Trie.search method """ # construct Trie trie = Trie() words = "on the banks of red cedar theres school thats known to all".split( ) for word in words: trie.add(word) # search for existing and non-existing words anti_words = [word[::-1] for word in words] for word, anti_word in zip(words, anti_words): count = trie.search(word) self.assertEqual(count, 1) count = trie.search(anti_word) self.assertEqual(count, 0) # add subset of duplicates and ensure proper count is returned duplicates = words[:len(words) // 2] for word in duplicates: trie.add(word) for word, anti_word in zip(words, anti_words): count = trie.search(word) # search legitimate word if word in duplicates: self.assertEqual(count, 2) else: self.assertEqual(count, 1) count = trie.search(anti_word) # search nonexistent word self.assertEqual(count, 0)
def suggestedProducts(self, products, searchWord): ''' :param products: :param searchWord: :return: ''' trie = Trie() for word in products: trie.insert(word) prefix = "" result = [] p_crawl = trie.root prefix = "" # O ( len(searchWord)^2 * len(products) * 26 * len(max(products)) ) time # O( len(max(products)) * 26 * len(products) ) space for s in searchWord: prefix += s curr_result = [] p_crawl = p_crawl.children[trie.getIndex(s)] if p_crawl: curr_result = self.dfs(p_crawl, prefix) result.append(curr_result) return result
def longestWord(self, words): ''' :param words: :return: ''' ''' populate the trie. len(words) * max(len(word)) Use dfs to find the deepest branch in the trie O(len(words) ) time. as max(len(word)) is bounded, O(len(words)) time | O(len(words)) space''' trie = Trie() for word in words: trie.insert(word) root = trie.root max_prefix = "" stack = [[root, ""]] while stack: curr_node, prefix = stack.pop() if (len(max_prefix) < len(prefix)): max_prefix = prefix for i in range(25, -1, -1): if curr_node.children[i]: if curr_node.children[i].isEndNode: stack.append( [curr_node.children[i], prefix + chr(97 + i)]) return max_prefix
class Dictionary: """""" def __init__(self,dict_map): """ param dict_map is a map of all words which likes dict_map['english'] = 'chinese' """ self.dict_trie = Trie(dict_map) self.similar_tree = BK_Tree() for key in dict_map: self.similar_tree.insert(key) self.dict_map = dict_map; def search(self,key): """main word in dict""" return self.dict_trie.search(key) def getSimilarWord(self,word,num = 5,maxDistance = 5): assert word is not None words = self.similar_tree.topKSimilar(word, num, maxDistance) trans = [] for value in words: tmp = self.dict_trie.search(value[0]) data = {"word": value[0],"translation":tmp[value[0]],"distance":value[1]} trans.append(data) return trans
def main(): words_dictionary = Trie() file_name = sys.argv[1] lowercase_letters = string.ascii_lowercase try: with open(file_name, 'r') as file_handle: for word in file_handle: words_dictionary.insert_word(word.rstrip()) except FileNotFoundError: print("The file name provided doesn't exist. Try again!") sys.exit() word_search_game = WordSearch(words_dictionary) while True: cmd = int(input("Enter 1 to play else any other digit to exit! : ")) if cmd == 1: num_rows = int( input("Please enter number of rows for the grid : ")) num_cols = int( input("Please enter number of columns for the grid : ")) start = time.time() grid = [[ random.choice(lowercase_letters) for i in range(num_cols) ] for j in range(num_rows)] print('\n') for row in grid: print(row) print(word_search_game.search_valid_words(grid)) print("\nTotal execution time : ", time.time() - start, "\n") else: sys.exit()
class Spell_Checker(): def __init__(self, corpus_list): self.word_trie = Trie() for sentense in corpus_list: valid_words = self.text_to_words(sentense) for valid_word in valid_words: self.word_trie.add(valid_word) def text_to_words(self, text): text=rep_special_chars.sub(' ', text) text = rep_numbers.sub('', text) # get rid of numbers words = text.split() # Split string into words return words def check(self, sentence): words = self.text_to_words(sentence) list_to_check = [] for w in words: if not self.word_trie.exists(w): list_to_check.append(w) if list_to_check: print('Check the spelling of the following words:') for w in list_to_check: print(w) else: print('No spelling errors found') def read_check(self): request = input('Would you like to check a sentense: (y/n)') while request == 'y': input_sentense = input('Introduce the sentense you would like to check:') self.check(input_sentense) request = input('Would you like to check a sentense: (y/n)')
def findMultiplePatternMatches(seq, patterns): trie = Trie(patterns) indicies = [i for i in xrange(len(seq) - max(map(len, patterns)) + 1) if trie.prefix_in_trie(seq[i:],1) is True] # for i in xrange(len(seq) - max(map(len, patterns)) + 1): # if trie.prefix_in_trie(seq[i:],1): # indicies.append(i) return indicies
def autocomplete(prefix: str, possible_queries: List[str]) -> List[str]: # Add all query strings to the Trie trie = Trie() for word in possible_queries: trie.insert(word) # Get the nested dictionary for input prefix prefix_dict = trie.find(prefix) # Get all words from this dictionary return complete_words(prefix, prefix_dict)
def trie_soln(): trie_methods = Trie() print("please enter filename") filename = read_console() strings = trie_methods.read_parse_file(filename) trie = trie_methods.create_trie(strings, 1, 1) for elt in trie: elt = [str(i) for i in elt] print(" ".join(elt))
def test_insert_trie_one(self): trie = Trie() arr1 = [1, 2, 3] trie.insert(arr1) collection = trie.collect([]) print(collection) self.assertEqual([[1, 2, 3]], collection)
def __init__(self, pinyins): self.pinyins = pinyins # 读入所有有效拼音 self.tree = Trie() f = open('pinyin/pinyin_list.txt') # f = open('pinyin_list.txt') for line in f: self.tree.insert(line.split()[0]) f.close()
def __init__(self, dict_map): """ param dict_map is a map of all words which likes dict_map['english'] = 'chinese' """ self.dict_trie = Trie(dict_map) self.similar_tree = BK_Tree() for key in dict_map: self.similar_tree.insert(key) self.dict_map = dict_map
def word_search_ii(grid, words): dictionary = Trie(map(chr, xrange(97, 123))) for word in words: dictionary.add(word) found_words = set() for row in xrange(len(grid)): for col in xrange(len(grid[0])): dfs_with_trie(row, col, grid, dictionary.root, found_words) return found_words
def __init__(self, regex=r"[\w]+"): """ initialize the WORDS dictionary which the key is a word and the value is the occurrences of the key """ self.trie = Trie() self.regex = regex with open(english_words, "r") as f: # Create a dictionary for storing all the words and its occurrences self.WORDS = Counter(self.words_token(f.read())) for word in self.WORDS.keys(): # put all the words in Trie self.trie.insert(word)
def test_prefix_trie_matching_pattern_not_found(self): trie = Trie(4) text = 'CGCAGTAACA' patterns = ['ATC', 'CAT', 'CGTA'] for pattern in patterns: trie.insert_key(pattern, len(pattern)) prefix_matches = prefix_trie_matching(text, trie) self.assertEqual([], prefix_matches)
def test_prefix_trie_matching(self): trie = Trie(4) text = 'CGCAGTAACA' patterns = ['CGCA', 'CGCAGT', 'CGC', 'C'] for pattern in patterns: trie.insert_key(pattern, len(pattern)) prefix_matches = prefix_trie_matching(text, trie) self.assertEqual(['C', 'CGC', 'CGCA', 'CGCAGT'], prefix_matches)
def test_splitWord(self): #given trie = Trie() resultlist = ["w", "o", "r", "d"] #when testlist = trie._splitWord("word") #then assert testlist == resultlist
def load_trie(): trie = Trie() count = 0 with open("成语俗语.txt", encoding='utf-8') as f: for line in f: count += 1 line = line.strip() trie.insert(line) print("word num:", count) return trie
def test_searching(self): trie = Trie() trie.add_key("semir") self.assertTrue(trie.in_tree("semir")) self.assertFalse(trie.in_tree("semiramida")) trie.add_key("semiramida") self.assertTrue(trie.in_tree("semiramida"))
def test_len_contains_empty(self): """ Test len(Trie), in Trie and Trie.empty methods """ # construct Trie and check empty trie = Trie() self.assertTrue(trie.empty()) # build trie words = "on the banks of red cedar theres school thats known to all".split( ) for word in words: trie.add(word) # check len, in, empty operators self.assertEqual(len(trie), len(words)) self.assertFalse(trie.empty()) anti_words = [word[::-1] for word in words] for word, anti_word in zip(words, anti_words): self.assertTrue(word in trie) self.assertFalse(anti_word in trie) # add duplicates duplicates = words[:len(words) // 2] for word in duplicates: trie.add(word) # check len, in, empty operators again with duplicates self.assertEqual(len(trie), len(words) + len(duplicates)) self.assertFalse(trie.empty()) anti_words = [word[::-1] for word in words] for word, anti_word in zip(words, anti_words): self.assertTrue(word in trie) self.assertFalse(anti_word in trie)
def __init__(self, filename): """ The constructor accepts the file as input. @input: string filename @return None """ self.loadFromFile(filename) self.tree = Trie() self.store = defaultdict(list) self.build_Tree( ) Person._registry = []
def corpus2table(data_path, table_path=None, lang=None): trie = Trie() with open(data_path, 'r', encoding='utf-8') as inp: for line in inp: words = word_tokenize(line) for w in words: w = non_word_pattern.sub('', w) if not w: continue trie.insert(f'{w.lower()}#') prefix_suffix_tree = trie.get_prefix_suffix_tree() print('Tree constructed') prefixes = sorted(prefix_suffix_tree.keys()) suffix_counts = Counter() for v in prefix_suffix_tree.values(): for k, count in v.items(): suffix_counts[k] += count # Take N most common suffixes sorted_counts = suffix_counts.most_common(300) suffixes = [el[0] for el in sorted_counts] freqs = [el[1] for el in sorted_counts] d = pd.DataFrame(index = prefixes, columns = suffixes, dtype = int).fillna(0) for prefix, suffix_counts_for_prefix in prefix_suffix_tree.items(): print(prefix) for suffix, count in suffix_counts_for_prefix.items(): if suffix in d.columns: d.loc[prefix,suffix] = count print('Dataframe constructed') entropies = d.apply(entropy) if lang is not None: # Regress entropies on log frequencies plt.figure(figsize=(16,10)) plt.scatter(np.log(freqs), entropies, marker = 'o') plt.savefig(f'/home/macleginn/Analyses/bible-tables/img/entropies_log_freqs_{lang}.png') cutoff = np.quantile(entropies, 0.9) d = d.loc[:,entropies > cutoff] print('Columns selected') if table_path is not None: d.to_csv(table_path) return d
def test_search_key_not_in_trie(self): trie = Trie() trie.insert_key('their', 1) trie.insert_key('there', 2) trie.insert_key('answer', 3) trie.insert_key('any', 4) trie.insert_key('bye', 5) self.assertIsNone(trie.search_key('hello'))
class Dictionary(object): """docstring for Dictionary""" def __init__(self, dictionary_path="./dictionary.txt"): super(Dictionary, self).__init__() with open(dictionary_path) as f: self.words = [word.strip() for word in f] self.trie = Trie(self.words) def is_prefix(self, prefix): return self.trie.in_trie(prefix) def is_valid_word(self, prefix): return self.trie.is_valid_word(prefix)
def main(): words = [ 'shocking', 'jeans', 'groan', 'employ', 'milky', 'supply', 'silk', 'lean', 'brawny', 'peace', 'destruction', 'notice','apple', 'app', 'apps', 'self', 'loops', 'error', 'dynamic' ]; trie = Trie(words); # Initialize Trie with words. Initialization utilizes Trie.add_word feature. ## Testing Trie.delete_word feature; trie.delete_word('shocking'); trie.delete_word('apps'); ## Testing Trie.add_word & Trie.find_word features. excluded_words = []; words_2 = ['sunny', 'abc', 'zorro'] + words + ['eve', 'zebra']; for w in words_2: if not trie.find_word(w): excluded_words.append(w); print(excluded_words); ## Testing Trie.delete_word feature. false_deletion = []; for w in words_2: if not trie.delete_word(w): false_deletion.append(w); print(false_deletion); # Check if deletion is successful. assert excluded_words == false_deletion, 'FAIL/CORRUPT DELETION in TRIE.' return 0;
def replaceWords(self, dict, sentence): t = Trie() words = sentence.split() for key in dict: t.insert(key) new_sentence = [] for word in words: d = t.get_first_word_in_item(word) if d: new_sentence.append(d) else: new_sentence.append(word) return " ".join(new_sentence)
def load_trie(loc): '''Recebe um arquivo de memoria contendo um objeto gerado por save_trie e retorna uma trie com dos dados''' with open(loc,'rb') as file: data_dict = dill.load(file) #Cria trie a partir do dicionario unpicklado t = Trie() for key in data_dict: insert(key,data_dict[key],t.root) t.yield_strings(t.root) #Retorna trie return t
def trie_test(self): items = ["potato", "pots", "buckle"] trie = Trie() for item in items: trie.add(item) trie_items = [x for x in trie.next()] self.assertEqual(items, trie_items) # test that the trie contains the same items that we put in node = trie.find("pot") containing_words = [x for x in node.terminals("pot")] self.assertEqual(len(containing_words), 2) # test find returns potato and pots, and NOT buckle self.assertIn("potato", containing_words) # order does not matter self.assertIn("pots", containing_words) self.assertNotIn("buckle", containing_words)
def __init__(self,dict_map): """ param dict_map is a map of all words which likes dict_map['english'] = 'chinese' """ self.dict_trie = Trie(dict_map) self.similar_tree = BK_Tree() for key in dict_map: self.similar_tree.insert(key) self.dict_map = dict_map;
def main(): from string import ascii_lowercase from random import choice import csv, re b = BoggleBoard() for i in range(4): for j in range(4): b.put((i,j), choice(ascii_lowercase)) t = Trie() words =[] with open("wordsList.csv", 'r') as f: wl = csv.reader(f) for l in wl: words.extend(l) for word in words: if re.match(r"^[a-z]*$", word): t.put(word, len(word)) solver = BoggleSolver(b, t) solver.solve()
def findLongestWord(self): word = self.word trie = Trie() queue = deque() #insert key to tree and also mark all the prefix with tuple format for key in word: # from longest to shortest prefixes = trie.getAllPrefix(key) for pf in prefixes: queue.append((key, key[len(pf):])) trie.insert(key) # get the longest word form the provided dictionary longest_word = ['',''] flag = 2 # mark get the first two longest dic = {} # mark visited word while queue: key,suffix = queue.popleft() if key not in dic and suffix in trie: dic[key] = True if len(key) > len(longest_word[0]): longest_word[1] = longest_word[0] longest_word[0] = key elif len(key) > len(longest_word[1]): longest_word[1] = key else: prefixes = trie.getAllPrefix(suffix) for pf in prefixes: queue.append((key, suffix[len(pf):])) #print result print "longest_word 1 are ", longest_word[0], ', length is ',len(longest_word[0]) print "longest_word 2 are ", longest_word[1], ', length is ',len(longest_word[1]) print "total words can be combined by other words are", len(dic) return
def __init__(self): self.multipliers, letters, self.letter_values = load.load_game_properties() self.turn_num = 0 self.word_dict = Trie("../textfiles/wwf.txt") self.BOARD_SIZE = 15 self.tiles = [] self.empty_coords = [] for i in xrange(self.BOARD_SIZE): self.tiles.append([]) for j in xrange(self.BOARD_SIZE): self.tiles[i].append(Tile(self.multipliers["single"])) self.empty_coords.append((i,j)) self.init_multipliers(self.multipliers)
def __init__(self, tk_master, trie_file_location="trie.json"): self.tk_master = tk_master self.tk_master.minsize(width=350, height=250) self.trie_file_location = trie_file_location self.input_frame = Tk.Frame(self.tk_master) self.button_frame = Tk.Frame(self.tk_master) self.menu = Tk.Menu(tk_master) self.tk_master.config(menu=self.menu) self.input_frame.grid(row=0, column=0, sticky="n") self.button_frame.grid(row=0, column=1) self.menu.add_command(label="Save to file", command=self.save_trie) self.autocomplete_field = Tk.Text(self.input_frame, width=20) self.autocomplete_field.grid(row=0, column=0) self.autocomplete_field.bind("<KeyRelease>", self.autocomplete_last_word) self.button = Tk.Button(self.input_frame, text="Add words", command=self.add_word) self.trie = Trie.from_JSON_file(trie_file_location)
class Lexer: def __init__(self): # initialise the DFA class with the 5 elements in DFA self.file = open("input3.txt") self.Trie = Trie() self.putback_bool, self.putback_val = False, "" self.current_state, self.current_word = 0, "" self.previous_word = "" self.previous_state = 0 self.states = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} # 8 is putback and 9 is error self.starting_state = {0} self.accepting_states = {1,2,3,5,7} uppercase, lowercase = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'} , { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'} whitespace, numbers = {'\t', '\n', ' '}, { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' } invalids = {'?', '-', '_', '+', '|', ',', '.'} delta = dict() # Set up the delta function for each state. for letter in uppercase: delta[(0, letter)] = 1 delta[(4, letter)] = 4 delta[(6, letter)] = 4 # Move to putback/error state i.e. State 8/9 delta[(3, letter)] = 8 for i in [1,2,7]: delta[(i, letter)] = 9 for letter in lowercase: delta[(0, letter)] = 2 delta[(1, letter)] = 1 delta[(2, letter)] = 2 delta[(4, letter)] = 4 delta[(6, letter)] = 4 # Move to putback/error state i.e. State 8/9 delta[(3, letter)] = 8 # e.g. 0a check delta[(7, letter)] = 9 # e.g. ;b error for number in numbers: delta[(0, number)] = 3 delta[(3, number)] = 3 delta[(6, number)] = 4 # Move to putback/error state i.e. State 8/9 for i in [1,2]: delta[(i, number)] = 8 delta[(7, number)] = 9 # e.g. ;0 error for space in whitespace: delta[(0, space)] = 0 for i in [4,6]: delta[(i, space)] = 4 for character in invalids: for i in [0,4,5]: delta[(i, character)] = 9 for i in [1,2,3,6,7]: delta[(i, character)] = 8 delta[(0, '"')], delta[(6, '~')], delta[(6, '"')] = 4,4,4 delta[(4, '"')] = 5 delta[(4, '~')] = 6 delta[(0, ')')], delta[(0, '(')], delta[(0, ';')] = 7,7,7 self.delta = delta def transition_state(self, input): if ((self.current_state, input) not in self.delta.keys()): self.current_state = 9 else: self.previous_state = self.current_state if self.current_state == 3: if self.check_max_int(): self.current_state = self.delta[(self.current_state, input)] else: self.current_state = 9 else: self.current_state = self.delta[(self.current_state, input)] def run_char(self, input): if(input == '\t' or input == '\n' or input == ' ' or input == '') and self.current_state != 4: # If white space and not in string check token if self.current_state == 3: if self.check_max_int(): identifier = self.checkIdentifier(self.current_state) if identifier != "ERROR": self.Trie.proccessWord(self.current_word) self.resetTrackedVariables() return identifier else: self.resetTrackedVariables() return "ERROR" else: identifier = self.checkIdentifier(self.current_state) if identifier != "ERROR": self.Trie.proccessWord(self.current_word) self.resetTrackedVariables() return identifier else: if not self.putback_bool: # if putback is false then change state and check state self.transition_state(input) if self.current_state != 8: #if not in putback state then add to current word if input != "~" or self.current_state == 6: self.current_word = self.current_word + input else: # if new state is putback state then set put back char, # process word and reset self.set_putback(input) identifier = self.checkIdentifier(self.previous_state) self.Trie.proccessWord(self.current_word) self.resetTrackedVariables() return identifier def driver(self): with self.file as f: while True: if self.putback_bool: identifier = self.run_char(self.putback()) else: c = f.read(1) if not c: break identifier = self.run_char(c) if identifier != None: if identifier == 'ID': return '<'+identifier +','+str(self.Trie.checkWordExists(self.previous_word))+'>' elif identifier == 'STRING': word = [] for letter in self.previous_word: word.append(letter) return '<' + identifier + ',' + str(word) + '>' elif identifier == 'INT': return '<' + identifier + ',' + self.previous_word + '>' elif identifier == 'LPAR' or identifier == 'RPAR' or identifier == 'SEMICOLON': return '<' + identifier + ', 0 >' elif identifier == 'ERROR': return '<' + identifier + '>' def in_accepting_state(self, state): return state in self.accepting_states def resetTrackedVariables(self): self.current_state = 0 self.previous_word = self.current_word self.current_word = "" def putback(self): self.putback_bool = False return self.putback_val def set_putback(self, character): self.putback_bool = True self.putback_val = character def checkIdentifier(self, state): if self.in_accepting_state(state): #check if in accepting state if state == 1 or state == 2: return "ID" elif state == 3: return "INT" elif state == 5: return "STRING" elif state == 7: if self.current_word == '(': return 'LPAR' elif self.current_word == ')': return 'RPAR' elif self.current_word == ';': return 'SEMICOLON' else: return "ERROR" def check_max_int(self): if len(self.current_word) > 5: return False elif len(self.current_word) == 5: if int(self.current_word[0]) > 6: return False elif int(self.current_word[0]) == 6: if int(self.current_word[1]) > 5: return False elif int(self.current_word[1]) == 5: if int(self.current_word[2]) > 5: return False elif int(self.current_word[2]) == 5: if int(self.current_word[3]) > 3: return False elif int(self.current_word[3]) == 3: if int(self.current_word[4]) > 4: return False return True
def __init__(self): self.lexicon = Trie()
class Vocabulary: def __init__(self): self.lexicon = Trie() def next(self): return self.lexicon.next() def __contains__(self, item): return self.lexicon.find(item) is not None def fetch(self, path): """Retrieves a set of words from the given file path. Function assumes each line in file is a word. :param path: The path to the vocabulary file. """ with open(path, 'r') as document: for line in document: for word in line.split(): self.lexicon.add(word) def word_ladder(self, origin, destination): """Constructs a word ladder between the given words using the fetched vocabulary. A word ladder is a sequence of words, from origin to destination, where each intermediary word changes exactly one letter in the previous word. All intermediate words in the ladder must be real words. Constructing a word ladder loosely follows the methodology of A* path finding. A tree data structure is used to store a collection of words and the paths between them. The tree is filled first with the destination word and is then traversed breadth first adding each word's legal one character substitutions. Traversal ends when any path has reached the origin and that path's ancestry is returned. The tree is traversed breadth first so that the shortest path is found in all cases. A tree begins at the destination and works backwards to the origin so that the chosen path's ancestry is in the correct order. :param origin: The starting word to construct a word ladder from. :param destination: The word that the ladder traverses to. :return: A sequence of words that constitutes a word ladder. """ paths = Tree() # tree stores all possible paths paths.add_root(destination) # start at destination so that ancestry path is in the correct order visited = set() # no need for ANY branch to revisit a word that another branch has been to for node in paths.breadth_first(): if node.data == origin: # if node is origin, the word ladder is complete path = [] for ancestor in node.ancestor_data(): # construct a path from this nodes ancestors. path.append(ancestor) return path else: for word in self.similar(node.data): # add each similar word to this nodes path... if word not in visited: # ...only if it hasn't been visited by ANY other tree path node.add(word) visited.add(word) return [] # no path was found def similar(self, word): """Searches for words similar to the given word by preforming character substitutions on each character in the given word. :param word: A word to find similar words to. """ if word not in self: raise StopIteration for switch_position in range(len(word)): walker = self.lexicon.find(word[:switch_position]) # Each child is a possible character substitution. A valid child is one that contains the remaining # original characters from the given word for key, child in walker.children.items(): # Don't include the original character as a valid choice if word[switch_position] is not key: # if on the last letter of 'word' add all children if switch_position is len(word) - 1: yield word[:switch_position] + key # otherwise, check that each child contains remaining original characters from word elif word[switch_position + 1:] in child: yield word[:switch_position] + key + word[switch_position + 1:]
#! /usr/bin/env python # Writer: wuhanghao # Date: 2016.2.1 from Trie import Trie with open(r'.\text.txt') as f: txt = f.read().decode('gb2312') pattern = Trie() rst = isMatched, lastPos, leng = pattern.match(txt) print rst if isMatched: print 'pattern "%s" found' % txt[lastPos-leng:lastPos] else: print 'pattern notfound'
''' You're given a dictionary of strings, and a key. Check if the key is composed of an arbitrary number of concatenations of strings from the dictionary. For example: dictionary: "world", "hello", "super", "hell" key: "helloworld" --> return true key: "superman" --> return false key: "hellohello" --> return true ''' from Trie import Trie words = ["world", "hello", "super", "hell" ] trie = Trie() for word in words: trie.insert(word, 1) def search(root, key, new_start = False): if root == None: return False if new_start: if not root.children.get(key[0], None): return False if(len(key) == 0): if root.data == 1: return True return False #Since we still have characters left, we search for the child node using the next
def setUp(self): self.empty = Trie() self.t = Trie() self.t.add('bob',2) self.t.add('apple', 3)
def setUp(self): self.lukija = WordReader() # test addFileNames self.lukija.addFileNames(["../../Material/The Adventures of Tom Sawyer by Mark Twain.txt"]) self.trie = Trie(self.lukija)
class TestTrie(unittest.TestCase): def setUp(self): self.empty = Trie() self.t = Trie() self.t.add('bob',2) self.t.add('apple', 3) def testEmpty(self): words = self.empty.traverseWords() self.assertEqual(words,'Empty') print('\ntestEmpty PASSED') def testInsert(self): self.empty.add('bob',2) words = self.empty.traverseWords() self.assertEqual(words,'bob') self.empty.add('apple', 3) words = self.empty.traverseWords() self.assertEqual(words,'bob apple') print('\ntestInsert PASSED') def testIsMember(self): result = self.t.isMember('bob') and self.t.isMember('apple') self.assertTrue(result) result = self.t.isMember('bo') or self.t.isMember('bobo') or self.t.isMember('ap') or \ self.t.isMember('dave') self.assertFalse(result) print('\ntestIsMember PASSED') def testCommonPrefix(self): self.t.add('at',5) words = self.t.traverseWords() self.assertEqual(words,'bob apple at') result = self.t.getValue('at') self.assertEqual(result, 5) result = self.t.isMember('at') self.assertTrue(result) self.t.add('ate',7) result = self.t.getValue('ate') self.assertEqual(result, 7) result = self.t.isMember('ate') self.assertTrue(result) result = self.t.getValue('at') self.assertEqual(result, 5) result = self.t.isMember('at') self.assertTrue(result) words = self.t.traverseWords() self.assertEqual(words,'bob apple at ate') self.t.remove('at') words = self.t.traverseWords() self.assertEqual(words,'bob apple ate') result = self.t.isMember('at') self.assertFalse(result) self.assertEqual(self.t.getValue('at'),None) result = self.t.isMember('ate') self.assertTrue(result) self.assertEqual(self.t.getValue('ate'),7) self.t.add('at',6) result = self.t.getValue('at') self.assertEqual(result, 6) result = self.t.isMember('at') self.assertTrue(result) words = self.t.traverseWords() self.assertEqual(words,'bob apple at ate') self.t.remove('ate') words = self.t.traverseWords() self.assertEqual(words,'bob apple at') result = self.t.isMember('at') self.assertTrue(result) self.assertEqual(self.t.getValue('at'),6) result = self.t.isMember('ate') self.assertFalse(result) self.assertEqual(self.t.getValue('ate'),None) print('\ntestCommonPrefix PASSED') def testRemove(self): self.t.add('add',5) result = self.t.isMember('add') self.assertTrue(result) result = self.t.traverseWords() self.assertEqual(result, 'bob apple add') boolResult = self.t.remove('apple') self.assertTrue(boolResult) result = self.t.traverseWords() self.assertEqual(result, 'bob add') boolResult = self.t.remove('add') self.assertTrue(boolResult) result = self.t.traverseWords() self.assertEqual(result, 'bob') boolResult = self.t.remove('bob') self.assertTrue(boolResult) result = self.t.traverseWords() self.assertEqual(result, 'Empty') print('\ntestRemove PASSED') def testUpdateValue(self): result = self.t.updateValue('bob',10) self.assertTrue(result) checkValue = self.t.getValue('bob') self.assertEqual(checkValue, 10) result = self.t.updateValue('apple',12) self.assertTrue(result) checkValue = self.t.getValue('apple') self.assertEqual(checkValue, 12) result = self.t.updateValue('app',1) self.assertTrue(result) checkValue = self.t.getValue('app') self.assertEqual(checkValue, 1) result = self.t.updateValue('dave',12) self.assertFalse(result) checkValue = self.t.getValue('dave') self.assertEqual(checkValue, None) print('\ntestUpdateValue PASSED') def testGetValue(self): result = self.t.getValue('bob') self.assertEqual(result, 2) result = self.t.getValue('apple') self.assertEqual(result, 3) result = self.t.getValue('bo') self.assertEqual(result, None) result = self.t.getValue('dave') self.assertEqual(result, None) print('\ntestGetValue PASSED')
class Board: def init_multipliers(self, multipliers): mult_loc = {} mult_loc["double letter"] = [(1,2), (2,4), (4,6)] mult_loc["double word"] = [(1,5), (3,7)] mult_loc["triple letter"] = [(0,6), (3,3), (6,0)] mult_loc["triple word"] = [(0,3), (3,0)] for mult in mult_loc: for i,j in mult_loc[mult]: self.tiles[i][j].set_multiplier(multipliers[mult]) self.tiles[self.BOARD_SIZE - (i+1)][j].set_multiplier(multipliers[mult]) self.tiles[i][self.BOARD_SIZE - (j+1)].set_multiplier(multipliers[mult]) self.tiles[self.BOARD_SIZE - (i+1)][self.BOARD_SIZE - (j+1)].set_multiplier(multipliers[mult]) for j,i in mult_loc[mult]: self.tiles[i][j].set_multiplier(multipliers[mult]) self.tiles[self.BOARD_SIZE - (i+1)][j].set_multiplier(multipliers[mult]) self.tiles[i][self.BOARD_SIZE - (j+1)].set_multiplier(multipliers[mult]) self.tiles[self.BOARD_SIZE - (i+1)][self.BOARD_SIZE - (j+1)].set_multiplier(multipliers[mult]) self.tiles[self.BOARD_SIZE / 2][self.BOARD_SIZE / 2].set_multiplier(multipliers["start"]) def __init__(self): self.multipliers, letters, self.letter_values = load.load_game_properties() self.turn_num = 0 self.word_dict = Trie("../textfiles/wwf.txt") self.BOARD_SIZE = 15 self.tiles = [] self.empty_coords = [] for i in xrange(self.BOARD_SIZE): self.tiles.append([]) for j in xrange(self.BOARD_SIZE): self.tiles[i].append(Tile(self.multipliers["single"])) self.empty_coords.append((i,j)) self.init_multipliers(self.multipliers) def within_bounds(self, coords): return coords[0] >= 0 and coords[0] < self.BOARD_SIZE and coords[1] >= 0 and coords[1] < self.BOARD_SIZE def print_board(self): for i in xrange(self.BOARD_SIZE): if i == 0: print("").rjust(3), for j in xrange(self.BOARD_SIZE): print (str(j)).rjust(3), print('\n') for j in xrange(self.BOARD_SIZE): if j == 0: print(str(i)).rjust(3), if self.tiles[i][j].get_letter() == None: print(self.tiles[i][j].get_multiplier()).rjust(3), else: print (self.tiles[i][j].get_letter()).rjust(3), print ('\n') def get_next_in_direction(self, coord, direction, orient): return (coord[0] + orient * direction[0], coord[1] + orient * direction[1]) def compute_cross_checks(self): directions = [(0,1), (1,0)] curr_array = bitarray(26) curr_array.setall(False) alphabet = string.lowercase for coord in self.empty_coords: for direction in directions: score = 0 curr_coord = self.get_next_in_direction(coord, direction, -1) left_word = "" while self.within_bounds(curr_coord) and curr_coord not in self.empty_coords: left_word = self.get_tile(curr_coord).get_letter() + left_word score += self.letter_values[self.get_tile(curr_coord).get_letter()] * self.get_tile(curr_coord).is_wild_card() curr_coord = self.get_next_in_direction(curr_coord,direction, -1) right_word = "" curr_coord = self.get_next_in_direction(coord, direction, 1) while self.within_bounds(curr_coord) and curr_coord not in self.empty_coords: right_word = right_word + self.get_tile(curr_coord).get_letter() score += self.letter_values[self.get_tile(curr_coord).get_letter()] * self.get_tile(curr_coord).is_wild_card() curr_coord = self.get_next_in_direction(curr_coord,direction,1) if left_word != "" or right_word != "": for i in xrange(len(alphabet)): cand_word = left_word + alphabet[i] + right_word curr_array[i] = self.word_dict.word_exists(cand_word) else: curr_array.setall(True) self.get_tile(coord).fill_cross_check(direction, curr_array) self.get_tile(coord).set_cross_check_score(direction, score) def place_letter(self, letter, coords): self.tiles[coords[0]][coords[1]].set_letter(letter) self.empty_coords.remove(coords) def get_adjacent_placed_tiles(self, coords): adjacent_tiles = [] for x,y in [(coords[0]+i, coords[1]+j) for i in [-1,0,1] for j in [-1,0,1] if abs(i) != abs(j)]: if self.within_bounds((x,y)) and self.get_tile((x,y)).get_letter() != None: adjacent_tiles.append((x,y)) return adjacent_tiles def get_turn(self): return self.turn_num def advance_turn(self): self.turn_num += 1 def get_start_pos(self): return (self.BOARD_SIZE / 2, self.BOARD_SIZE / 2) def get_dict(self): return self.word_dict def get_tile(self,coords): return self.tiles[coords[0]][coords[1]] def get_empty_coords(self): return self.empty_coords def get_turn(self): return self.turn_num def get_letter_value(self, letter): return self.letter_values[letter]
class pinyin(object): def __init__(self, pinyins): self.pinyins = pinyins # 读入所有有效拼音 self.tree = Trie() f = open('pinyin/pinyin_list.txt') # f = open('pinyin_list.txt') for line in f: self.tree.insert(line.split()[0]) f.close() def split(self): ''' 分割函数 @param pinyin: 拼音串 str @return: 分割后的拼音列表 list ''' # 可作为拼音开头的字母 pinyin_initials = ['a', 'b', 'e', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'j', 'q', 'x', 'r', 'z', 'c', 's', 'y', 'w'] # pinyin_initials = self.tree.root.children iuv = ['i','u','v'] grn = ['g','r','n'] input = '' result = [] for i in range(len(self.pinyins)): c = self.pinyins[i] # 读入字符 c input += c # c是 i|u|v,并且是拼音串的首字母 if c in iuv and len(input)==1: return False,None # 当前拼音有效或者是有效拼音的一部分 if self.tree.find_initial_with(input): continue # c是声母 if c in pinyin_initials: # 前面的拼音为有效拼音 if self.tree.find_initial_with(input[:-1]): # 在c前断开 result.append(input[:-1]) input = input[-1:] continue else: return False,None # 倒数第二个字母为 g|r|n elif input[-2:-1] in grn: # 在 g|r|n 前断开有效 if self.tree.find_initial_with(input[:-2]): # 在 g|r|n 前断开 result.append(input[:-2]) input = input[-2:] continue # 在 g|r|n 后断开有效 elif self.tree.find_initial_with(input[:-1]): # 在 g|r|n 后断开 result.append(input[:-1]) input = input[-1:] continue else: # 单独断开 result.append(input) input = '' result.append(input) return True,result
def __init__(self): # initialise the DFA class with the 5 elements in DFA self.file = open("input3.txt") self.Trie = Trie() self.putback_bool, self.putback_val = False, "" self.current_state, self.current_word = 0, "" self.previous_word = "" self.previous_state = 0 self.states = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} # 8 is putback and 9 is error self.starting_state = {0} self.accepting_states = {1,2,3,5,7} uppercase, lowercase = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'} , { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'} whitespace, numbers = {'\t', '\n', ' '}, { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' } invalids = {'?', '-', '_', '+', '|', ',', '.'} delta = dict() # Set up the delta function for each state. for letter in uppercase: delta[(0, letter)] = 1 delta[(4, letter)] = 4 delta[(6, letter)] = 4 # Move to putback/error state i.e. State 8/9 delta[(3, letter)] = 8 for i in [1,2,7]: delta[(i, letter)] = 9 for letter in lowercase: delta[(0, letter)] = 2 delta[(1, letter)] = 1 delta[(2, letter)] = 2 delta[(4, letter)] = 4 delta[(6, letter)] = 4 # Move to putback/error state i.e. State 8/9 delta[(3, letter)] = 8 # e.g. 0a check delta[(7, letter)] = 9 # e.g. ;b error for number in numbers: delta[(0, number)] = 3 delta[(3, number)] = 3 delta[(6, number)] = 4 # Move to putback/error state i.e. State 8/9 for i in [1,2]: delta[(i, number)] = 8 delta[(7, number)] = 9 # e.g. ;0 error for space in whitespace: delta[(0, space)] = 0 for i in [4,6]: delta[(i, space)] = 4 for character in invalids: for i in [0,4,5]: delta[(i, character)] = 9 for i in [1,2,3,6,7]: delta[(i, character)] = 8 delta[(0, '"')], delta[(6, '~')], delta[(6, '"')] = 4,4,4 delta[(4, '"')] = 5 delta[(4, '~')] = 6 delta[(0, ')')], delta[(0, '(')], delta[(0, ';')] = 7,7,7 self.delta = delta
class PyTrieTestCases(unittest.TestCase): def setUp(self): self.lukija = WordReader() # test addFileNames self.lukija.addFileNames(["../../Material/The Adventures of Tom Sawyer by Mark Twain.txt"]) self.trie = Trie(self.lukija) def tearDown(self): self.lukija.clear('all') self.trie.clear() self.lukija = None self.trie = None def testSimpleAddFind(self): """ Add some objects to Trie and see if you can find them """ checklist = [] for object in WordsToAdd: self.trie.add(object[0], object[1:]) # Add words to Trie for word in WordsToAdd: # Get the position of each word pos, _, _ = self.trie.find(word[0]) # We add the word and the found positions to match list formatting # to the input checklist.append((word[0], pos[0][0], pos[0][1])) self.assertEqual(checklist , WordsToAdd, 'Trie: Did not find all words that were supposed to add') def testMultiWordFind(self): for object in MultiWordAdd: self.trie.add(object[0], object[1:]) # Add words to Trie pos, _, _ = self.trie.find('a') self.assertEqual(pos, MultiWordFindA, 'Trie: Error finding multiple instances of a word') pos, _, _ = self.trie.find('b') self.assertEqual(pos, MultiWordFindB, 'Trie: Error finding multiple instances of a word') def testWordCounter(self): """ Tests that both the reader and the tree can count the words """ self.lukija.clear('all') self.lukija.addFileNames(["../../Material/50words_in_UTF-8.txt"]) self.assertEqual(self.lukija.wordcount, 0, 'Trie: WordReader clearing failed') self.lukija.readWords() self.assertEqual(self.lukija.wordcount, 50, 'Trie: WordReader failed in reading words') self.trie.clear() self.trie.addFromReader() self.assertEqual(self.trie.wordCount, 50, 'Trie: word counting failed')
def __init__(self, dictionary_path="./dictionary.txt"): super(Dictionary, self).__init__() with open(dictionary_path) as f: self.words = [word.strip() for word in f] self.trie = Trie(self.words)