def sum_n(L, k): H = htc.HashTableChain(len(L)) for i in L: if H.retrieve(k - i) != None: return True H.insert(i) return False
def remove_duplicates(L): h = htc.HashTableChain(len(L)) Lrd = [] for i in L: if h.insert(i, ' ') != -1: Lrd.append(i) return Lrd
def abstract_Stats(name, L, h): new_list = [] stop_words = 0 records = 0 common_num = 0 num = 0 for i in range(len(L)): if L[i] != h.retrieve(L[i]): new_list.append(L[i]) else: stop_words += 1 print("Total words: ", len(L)) print("Total non-stop words: ", len(L) - stop_words) print("Analysis of", name, "hash table") b = htc.HashTableChain(len(new_list)) for j in range(len(new_list)): num = b.retrieve(new_list[j]) if num is None: b.insert(new_list[j], 1) records += 1 else: num += 1 b.update(new_list[j], num) if b.retrieve(new_list[j]) > common_num: common_num = b.retrieve(new_list[j]) common_word = new_list[j] print("Total buckets: ", len(b.bucket)) print("Total records: ", records) print("Load factor: ", load_factor(b)) print("Empy bucket fraction in table: ", empty_bucket_fraction(b)) long_bucket_F(b) print("Most common word: ", common_word, "occurs -", common_num, "times") return
def has_duplicates(L): # O(n) h = htc.HashTableChain(len(L)) for i in L: if h.insert(i, []) == -1: return True return False
def different_words(L): H = htc.HashTableChain(len(L)) count = 0 for w in L: if H.insert(w) > 0: count += 1 return count
def sum2HT(L,k): h = htc.HashTableChain(len(L)) for i in range(len(L)): # iterates the list h.insert(i,k-L[i]) # index as data and data as k - L[i], to find the missing number if h.retrieve(i) in L: # checks if the curr index data is in the list L return i,L.index(h.bucket[i][0].data) # returns curr index (i) and the pos (j) in which the other number is return None, None # No such values that sum is equal to k
def numVert(edgeLst): # Returns the total number of verticies in the graph h = htc.HashTableChain(len(edgeLst) * 2) cnt = 0 for i in edgeLst: for j in i: if h.insert(j, j) == 1: cnt += 1 return cnt
def unique_items(L): h = htc.HashTableChain(len(L)) unique = [] for i in range(len(h.bucket)): if h.insert(L[i],L[i]) == 1: unique.append(L[i]) unique.sort() return unique
def find_sum_pair(S, k): h = htc.HashTableChain(len(S)) for s in S: if h.retrieve(k - s) != None: return True else: h.insert(s, s) return False
def difference(L1, L2): diff = [] h = htc.HashTableChain(len(L2)) for i in L2: h.insert(i) for i in L1: if h.retrieve(i) == None: diff.append(i) return diff
def build_index_table(L): h = htc.HashTableChain(len(L)) for i in range(len(L)): if h.retrieve(L[i]) != None: # Key is already in the hashtable. indices = h.retrieve(L[i]) + [i] # So, we concatenate the index of the repeated key h.update(L[i],indices) else: # Key is not in the hashtable h.insert(L[i],[i]) return h
def reversed_pairs(L): pairs = [] h = htc.HashTableChain(len(L)) for i in L: h.insert(i) for i in L: if h.retrieve(i[::-1]) != None: pairs.append(i) return sorted(pairs)
def isUniqueHash(s): h = htc.HashTableChain(len(s)) for char in s: letter = h.retrieve(char) # Counter if letter == None: # Character is not on the hashtable yet h.insert(char,1) else: # Character appears 2 or more times in the string return False return True
def intersection(L1, L2): ints = [] h = htc.HashTableChain(len(L1)) for i in L1: h.insert(i) for i in L2: if h.retrieve(i) != None: ints.append(i) return ints
def reversed_pairs(L): pairs = [] h = htc.HashTableChain(len(L)) for rev in L: h.insert(rev[::-1], rev[::-1]) for s in L: val = h.retrieve(s) if val != None: pairs.append(val) return sorted(pairs)
def build_index_table(L): H = htc.HashTableChain(len(L)) for i in range(len(L)): p = H.retrieve(L[i]) if p == None: H.insert(L[i], [i]) else: p.append(i) H.update(L[i], p) return H
def repeats(S,c): h = htc.HashTableChain(len(S)) repeat = [] for i in range(len(S)): DNA = S[i:i+c] # Creates a string of length c if (h.retrieve(DNA) != None and # Sequence appears more than one time DNA not in repeat): # Condition to avoid repetition repeat += [DNA] else: h.insert(DNA,1) # Sequence appears at least one time return repeat
def analyzeFile(wl): wlh = htc.HashTableChain(len(wl)) reps = 0 repKey = wl[1] for w in wl: if wlh.insert(w, 1) == -1: wlh.update(w, wlh.retrieve(w) + 1) if wlh.retrieve(w) > reps: reps = wlh.retrieve(w) repKey = w analyzeHT(wlh) print('Most common word:', repKey, '- occurs', reps, 'times')
def missing_pair(L): H = htc.HashTableChain(len(L)) for i in L: c = H.retrieve(i) if c==None: H.insert(i) else: H.delete(i) for b in H.bucket: if len(b)>0: return b[0].key return None
def createHashTable(word_lists): H = htc.HashTableChain(len(word_lists)) for i in range(len(word_lists)): for j in word_lists[i]: lis = H.retrieve(j) if lis == None: H.insert(j, [i]) else: #checks for duplicates in a single sentence if i != (H.retrieve(j))[len(H.retrieve(j)) - 1]: lis += [i] H.update(j, lis) return H
def isPermutationHash(s1,s2): if len(s1) != len(s2): return False s1.lower() s2.lower() h = htc.HashTableChain(len(s1)) # Hashtable that contains each char in s1 for char in s1: h.insert(char,char) for char in s2: val = h.retrieve(char) if val == None: # char is not on the hashtable return False return True
def hash_Stats(L): h = htc.HashTableChain(len(L)) records = 0 for i in range(len(L)): if h.retrieve(L[i]) == None: records += 1 h.insert(L[i], L[i]) print("Total buckets: ", len(h.bucket)) print("Total records: ", records) print("Load factor: ", load_factor(h)) print("Empy bucket fraction in table: ", empty_bucket_fraction(h)) long_bucket_F(h) return h
def build_index_table( L): # Error: The indices are only being added to the first record h = htc.HashTableChain(len(L)) for i in range(len(L)): if h.retrieve(L[i]) == None: h.insert(L[i], [i]) else: #buck = h.h(L[i]) index = h.retrieve( L[i] ) # Therefore, we need to specify which key we want to update index.append(i) h.update(L[i], index) #h.bucket[buck][0].data.append(i) return h
def most_repeated_word(L): h = htc.HashTableChain(len(L)) rep = [ '', 0 ] # Stores the most repeated word and how many times appears in the text for i in L: count = h.retrieve(i) if count == None: # Word is not in hashtable h.insert(i, 1) else: count += 1 h.update(i, count) # Updates the times a word appears in the text if count > rep[1]: # Updates the most repeated word rep[0], rep[1] = i, count return rep[0], rep[1]
def repeats( S, c ): # Error: We should avoid repetition when appending the sequences to the list h = htc.HashTableChain(len(S)) repeat = [] for i in range(len(S) - c + 1): s = S[i:i + c] count = h.retrieve(s) if count == None: h.insert(s, 1) else: count += 1 h.update(s, count) if s not in repeat: repeat.append(s) return repeat
6, 3, 23, 16, 11, 25, 7, 17, 27, 30, 21, 14, 26, 8, 29, 22, 28, 5, 19, 24, 15, 1, 2, 4, 18, 13, 9, 20, 10, 12 ] for num in nums: T.insert(num) print('Question 2') T.draw() prune_leaves(T) T.draw() print('Question 3') L = [2, 4, 6, 1, 2, 3, 1, 12] H = htc.HashTableChain(len(L)) for i in L: H.insert(i) H.print_table() print(item_status(H, 5)) print(item_status(H, 6)) print(item_status(H, 4)) print(item_status(H, 12)) print('Question 4') H1 = min_heap.MinHeap() for i in [1, 3, 7, 8, 4, 13, 9, 10]: H1.insert(i) H1.draw()
if L[i] in L[i + 1:]: return True return False def has_duplicates_v2(L): # O(n log n) L.sort() for i in range(1, len(L)): if L[i] == L[i + 1:]: return True return False if __name__ == "__main__": h = htc.HashTableChain(9) players = [ 'Bellinger', 'Betts', 'Hernandez', 'Pederson', 'Pollock', 'Taylor' ] numbers = [35, 50, 14, 31, 11, 3] for i in range(len(players)): h.insert(numbers[i], players[i]) h.print_table() print(load_factor(h)) # 0.66666666666666 print(longest_bucket(h)) # 2
def create_hashtable(L): h = htc.HashTableChain(len(L)) for i in L: h.insert(i, i) return h
if h.retrieve(i[::-1]) != None: pairs.append(i) return sorted(pairs) if __name__ == "__main__": countries = [ 'Russia', 'Canada', 'USA', 'Brazil', 'Australia', 'China', 'Spain', 'France' ] capitals = [ 'Moscow', 'Ottawa', 'Washington', 'Brasilia', 'Canberra', 'Beijing', 'Madrid', 'Paris' ] h = htc.HashTableChain(len(countries)) for i in range(len(countries)): h.insert(countries[i], capitals[i]) h.print_table() ''' Table contents: bucket 0: [ [Australia, Canberra] ] bucket 1: [ [Spain, Madrid] ] bucket 2: [ ] bucket 3: [ [Russia, Moscow] [USA, Washington] [France, Paris] ] bucket 4: [ [Brazil, Brasilia] ] bucket 5: [ ] bucket 6: [ [Canada, Ottawa] ] bucket 7: [ [China, Beijing] ] '''
def invert_hash(h): hi = htc.HashTableChain(len(h.bucket)) for b in h.bucket: for i in b: hi.insert(i.data, i.key) return hi