def __init__(self, objs=[], hash_type=HashType.QUADRATIC, size=10, c1=1, c2=1): # if the hash table uses quadratic probing, then round the size to the nearest power of 2 if hash_type is HashType.QUADRATIC: from math import ceil from math import log self.size = pow(2, ceil(log(size) / log(2))) else: self.size = size self.item_count = 0 # Initialize the table self.table = [] # Check that all values are positive for obj in objs: if HashTable.to_value(obj) < 0: print("HashTable only takes positive values.") return # Chain hash tables have a list at each index if hash_type is HashType.CHAIN: for i in range(self.size): self.table.append([]) # open addressing hash tables need empty_since_start initialization else: # Index is a list of index numbers that contain items; it's only needed for open addressed hash tables self.index = AVL() for i in range(self.size): self.table.append(EmptyType.EMPTY_SINCE_START) # Initialize other values self.hash_type = hash_type self.c1 = c1 self.c2 = c2 if objs: for obj in objs: if len(str(obj)) > 0: self.insert(obj)
def plot_times(filename="English.txt", start=500, stop=5500, step=500): """Vary n from 'start' to 'stop', incrementing by 'step'. At each iteration, use the create_word_list() from the 'WordList' module to generate a list of n randomized words from the specified file. Time (separately) how long it takes to load a LinkedList, a BST, and an AVL with the data set. Choose 5 random words from the data set. Time how long it takes to find each word in each object. Calculate the average search time for each object. Create one plot with two subplots. In the first subplot, plot the number of words in each dataset against the build time for each object. In the second subplot, plot the number of words against the search time for each object. Inputs: filename (str): the file to use in creating the data sets. start (int): the lower bound on the sample interval. stop (int): the upper bound on the sample interval. step (int): the space between points in the sample interval. Returns: Show the plot, but do not return any values. """ interval = (stop-start)/step n_list = np.linspace(start,stop,interval+1) n_list = np.int16(n_list) word_list = create_word_list(filename) load_list = [] load_BST = [] load_AVL = [] find_list = [] find_BST = [] find_AVL = [] for n in n_list: temp_word_list = word_list[:n] random_word_indices = np.random.randint(0,n,size=5) words_to_find = [] for x in random_word_indices: words_to_find.append(temp_word_list[x]) L = LinkedList() B = BST() A = AVL() start = time() for word in temp_word_list: L.add(word) end = time() load_list.append(end-start) start = time() for word in temp_word_list: B.insert(word) end = time() load_BST.append(end-start) start = time() for word in temp_word_list: A.insert(word) end = time() load_AVL.append(end-start) start = time() for word in words_to_find: iterative_search(L, word) end = time() find_list.append(end-start) start = time() for word in words_to_find: B.find(word) end = time() find_BST.append(end-start) start = time() for word in words_to_find: A.find(word) end = time() find_AVL.append(end-start) avg_find_list = sum(find_list[:])/5. avg_find_BST = sum(find_BST[:])/5. avg_find_AVL = sum(find_AVL[:])/5. plt.subplot(121) list_plot1 = plt.plot(n_list, load_list,label='Singly-Linked List') BST_plot1 = plt.plot(n_list, load_BST, label='Binary Search Tree') AVL_plot1 = plt.plot(n_list, load_AVL, label='AVL Tree') plt.legend() plt.xlabel('Data Points') plt.ylabel('Seconds') plt.title('Build Times') plt.subplot(122) list_plot2 = plt.plot(n_list, find_list,label='Singly-Linked List') BST_plot2 = plt.plot(n_list, find_BST, label='Binary Search Tree') AVL_plot2 = plt.plot(n_list, find_AVL, label='AVL Tree') plt.legend() plt.xlabel('Data Points') plt.ylabel('Seconds') plt.title('Search Times') plt.show()
def plot_times(filename="English.txt", start=500, stop=5500, step=500): """Vary n from 'start' to 'stop', incrementing by 'step'. At each iteration, use the create_word_list() from the 'WordList' module to generate a list of n randomized words from the specified file. Time (separately) how long it takes to load a LinkedList, a BST, and an AVL with the data set. Choose 5 random words from the data set. Time how long it takes to find each word in each object. Calculate the average search time for each object. Create one plot with two subplots. In the first subplot, plot the number of words in each dataset against the build time for each object. In the second subplot, plot the number of words against the search time for each object. Inputs: filename (str): the file to use in creating the data sets. start (int): the lower bound on the sample interval. stop (int): the upper bound on the sample interval. step (int): the space between points in the sample interval. Returns: Show the plot, but do not return any values. """ def get_average_time_linked_list(to_search, linked_list, times_left, current_time = 0): while times_left > 0: start = time.time() iterative_search(linked_list, to_search[times_left-1]) end =time.time() current_time +=(end-start) times_left -=1 return current_time/len(to_search) def get_average_time_BST(to_search, BST_list, times_left, current_time =0): while times_left >0: start = time.time() BST_list.find(to_search[times_left-1]) end = time.time() current_time +=(end-start) times_left -= 1 return current_time/len(to_search) def get_average_time_AVL(to_search, AVL_list, times_left, current_time = 0): while times_left > 0: start = time.time() AVL_list.find(to_search[times_left-1]) end = time.time() current_time +=(end-start) times_left -= 1 return current_time/len(to_search) word_list = create_word_list(filename) if (stop-start)%step!=0: raise ValueError("Your steps won't get you from start to stop") current = start time_linked_list = [] time_BST_list = [] time_AVL_list = [] time_linked_list_search = [] time_BST_list_search = [] time_AVL_list_search = [] set_size = [] while current < stop: current_linked_list = LinkedList() current_BST = BST() current_AVL = AVL() current_list = word_list[:current] to_search = np.random.permutation(current_list) start_linked_time = time.time() for x in current_list: current_linked_list.add(x) end_linked_time = time.time() start_BST_time = time.time() for y in current_list: current_BST.insert(y) end_BST_time = time.time() start_AVL_time = time.time() for z in current_list: current_AVL.insert(z) end_AVL_time = time.time() time_linked_list.append(end_linked_time - start_linked_time) time_BST_list.append(end_BST_time - start_BST_time) time_AVL_list.append(end_AVL_time- start_AVL_time) time_linked_list_search.append(get_average_time_linked_list(to_search,current_linked_list, len(to_search))) time_BST_list_search.append(get_average_time_BST(to_search,current_BST, len(to_search))) time_AVL_list_search.append(get_average_time_AVL(to_search,current_AVL, len(to_search))) set_size.append(current) current+=step plt.subplot(2,1,1) plt.title('Building Data Structures') plt.plot(set_size,time_linked_list, label = 'Linked List', linewidth = 3) plt.plot(set_size, time_BST_list, label = "BST", linewidth = 3) plt.plot(set_size, time_AVL_list, label = "AVL", linewidth = 3) plt.legend(loc = 2) plt.subplot(2,1,2) plt.title("Searching Data Structures") plt.plot(set_size, time_linked_list_search, label = 'Linked list', linewidth = 3) plt.plot(set_size, time_BST_list_search, label = 'BST', linewidth = 3) plt.plot(set_size, time_AVL_list_search, label = 'AVL', linewidth = 3) plt.legend(loc = 2) plt.show()